Bug Summary

File:llvm/lib/Target/X86/X86InstrInfo.cpp
Warning:line 1966, column 13
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86InstrInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/X86 -I /build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/llvm/lib/Target/X86 -I include -I /build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-02-10-163437-15377-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220210114308+a6afa9e6b0d9/llvm/lib/Target/X86/X86InstrInfo.cpp
1//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86InstrInfo.h"
14#include "X86.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrFoldTables.h"
17#include "X86MachineFunctionInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Sequence.h"
22#include "llvm/CodeGen/LiveIntervals.h"
23#include "llvm/CodeGen/LivePhysRegs.h"
24#include "llvm/CodeGen/LiveVariables.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineModuleInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/StackMaps.h"
32#include "llvm/IR/DebugInfoMetadata.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Function.h"
35#include "llvm/MC/MCAsmInfo.h"
36#include "llvm/MC/MCExpr.h"
37#include "llvm/MC/MCInst.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/Target/TargetOptions.h"
43
44using namespace llvm;
45
46#define DEBUG_TYPE"x86-instr-info" "x86-instr-info"
47
48#define GET_INSTRINFO_CTOR_DTOR
49#include "X86GenInstrInfo.inc"
50
51static cl::opt<bool>
52 NoFusing("disable-spill-fusing",
53 cl::desc("Disable fusing of spill code into instructions"),
54 cl::Hidden);
55static cl::opt<bool>
56PrintFailedFusing("print-failed-fuse-candidates",
57 cl::desc("Print instructions that the allocator wants to"
58 " fuse, but the X86 backend currently can't"),
59 cl::Hidden);
60static cl::opt<bool>
61ReMatPICStubLoad("remat-pic-stub-load",
62 cl::desc("Re-materialize load from stub in PIC mode"),
63 cl::init(false), cl::Hidden);
64static cl::opt<unsigned>
65PartialRegUpdateClearance("partial-reg-update-clearance",
66 cl::desc("Clearance between two register writes "
67 "for inserting XOR to avoid partial "
68 "register update"),
69 cl::init(64), cl::Hidden);
70static cl::opt<unsigned>
71UndefRegClearance("undef-reg-clearance",
72 cl::desc("How many idle instructions we would like before "
73 "certain undef register reads"),
74 cl::init(128), cl::Hidden);
75
76
77// Pin the vtable to this file.
78void X86InstrInfo::anchor() {}
79
80X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
81 : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
82 : X86::ADJCALLSTACKDOWN32),
83 (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
84 : X86::ADJCALLSTACKUP32),
85 X86::CATCHRET,
86 (STI.is64Bit() ? X86::RET64 : X86::RET32)),
87 Subtarget(STI), RI(STI.getTargetTriple()) {
88}
89
90bool
91X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
92 Register &SrcReg, Register &DstReg,
93 unsigned &SubIdx) const {
94 switch (MI.getOpcode()) {
95 default: break;
96 case X86::MOVSX16rr8:
97 case X86::MOVZX16rr8:
98 case X86::MOVSX32rr8:
99 case X86::MOVZX32rr8:
100 case X86::MOVSX64rr8:
101 if (!Subtarget.is64Bit())
102 // It's not always legal to reference the low 8-bit of the larger
103 // register in 32-bit mode.
104 return false;
105 LLVM_FALLTHROUGH[[gnu::fallthrough]];
106 case X86::MOVSX32rr16:
107 case X86::MOVZX32rr16:
108 case X86::MOVSX64rr16:
109 case X86::MOVSX64rr32: {
110 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
111 // Be conservative.
112 return false;
113 SrcReg = MI.getOperand(1).getReg();
114 DstReg = MI.getOperand(0).getReg();
115 switch (MI.getOpcode()) {
116 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 116)
;
117 case X86::MOVSX16rr8:
118 case X86::MOVZX16rr8:
119 case X86::MOVSX32rr8:
120 case X86::MOVZX32rr8:
121 case X86::MOVSX64rr8:
122 SubIdx = X86::sub_8bit;
123 break;
124 case X86::MOVSX32rr16:
125 case X86::MOVZX32rr16:
126 case X86::MOVSX64rr16:
127 SubIdx = X86::sub_16bit;
128 break;
129 case X86::MOVSX64rr32:
130 SubIdx = X86::sub_32bit;
131 break;
132 }
133 return true;
134 }
135 }
136 return false;
137}
138
139bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
140 switch (MI.getOpcode()) {
141 default:
142 // By default, assume that the instruction is not data invariant.
143 return false;
144
145 // Some target-independent operations that trivially lower to data-invariant
146 // instructions.
147 case TargetOpcode::COPY:
148 case TargetOpcode::INSERT_SUBREG:
149 case TargetOpcode::SUBREG_TO_REG:
150 return true;
151
152 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
153 // However, they set flags and are perhaps the most surprisingly constant
154 // time operations so we call them out here separately.
155 case X86::IMUL16rr:
156 case X86::IMUL16rri8:
157 case X86::IMUL16rri:
158 case X86::IMUL32rr:
159 case X86::IMUL32rri8:
160 case X86::IMUL32rri:
161 case X86::IMUL64rr:
162 case X86::IMUL64rri32:
163 case X86::IMUL64rri8:
164
165 // Bit scanning and counting instructions that are somewhat surprisingly
166 // constant time as they scan across bits and do other fairly complex
167 // operations like popcnt, but are believed to be constant time on x86.
168 // However, these set flags.
169 case X86::BSF16rr:
170 case X86::BSF32rr:
171 case X86::BSF64rr:
172 case X86::BSR16rr:
173 case X86::BSR32rr:
174 case X86::BSR64rr:
175 case X86::LZCNT16rr:
176 case X86::LZCNT32rr:
177 case X86::LZCNT64rr:
178 case X86::POPCNT16rr:
179 case X86::POPCNT32rr:
180 case X86::POPCNT64rr:
181 case X86::TZCNT16rr:
182 case X86::TZCNT32rr:
183 case X86::TZCNT64rr:
184
185 // Bit manipulation instructions are effectively combinations of basic
186 // arithmetic ops, and should still execute in constant time. These also
187 // set flags.
188 case X86::BLCFILL32rr:
189 case X86::BLCFILL64rr:
190 case X86::BLCI32rr:
191 case X86::BLCI64rr:
192 case X86::BLCIC32rr:
193 case X86::BLCIC64rr:
194 case X86::BLCMSK32rr:
195 case X86::BLCMSK64rr:
196 case X86::BLCS32rr:
197 case X86::BLCS64rr:
198 case X86::BLSFILL32rr:
199 case X86::BLSFILL64rr:
200 case X86::BLSI32rr:
201 case X86::BLSI64rr:
202 case X86::BLSIC32rr:
203 case X86::BLSIC64rr:
204 case X86::BLSMSK32rr:
205 case X86::BLSMSK64rr:
206 case X86::BLSR32rr:
207 case X86::BLSR64rr:
208 case X86::TZMSK32rr:
209 case X86::TZMSK64rr:
210
211 // Bit extracting and clearing instructions should execute in constant time,
212 // and set flags.
213 case X86::BEXTR32rr:
214 case X86::BEXTR64rr:
215 case X86::BEXTRI32ri:
216 case X86::BEXTRI64ri:
217 case X86::BZHI32rr:
218 case X86::BZHI64rr:
219
220 // Shift and rotate.
221 case X86::ROL8r1:
222 case X86::ROL16r1:
223 case X86::ROL32r1:
224 case X86::ROL64r1:
225 case X86::ROL8rCL:
226 case X86::ROL16rCL:
227 case X86::ROL32rCL:
228 case X86::ROL64rCL:
229 case X86::ROL8ri:
230 case X86::ROL16ri:
231 case X86::ROL32ri:
232 case X86::ROL64ri:
233 case X86::ROR8r1:
234 case X86::ROR16r1:
235 case X86::ROR32r1:
236 case X86::ROR64r1:
237 case X86::ROR8rCL:
238 case X86::ROR16rCL:
239 case X86::ROR32rCL:
240 case X86::ROR64rCL:
241 case X86::ROR8ri:
242 case X86::ROR16ri:
243 case X86::ROR32ri:
244 case X86::ROR64ri:
245 case X86::SAR8r1:
246 case X86::SAR16r1:
247 case X86::SAR32r1:
248 case X86::SAR64r1:
249 case X86::SAR8rCL:
250 case X86::SAR16rCL:
251 case X86::SAR32rCL:
252 case X86::SAR64rCL:
253 case X86::SAR8ri:
254 case X86::SAR16ri:
255 case X86::SAR32ri:
256 case X86::SAR64ri:
257 case X86::SHL8r1:
258 case X86::SHL16r1:
259 case X86::SHL32r1:
260 case X86::SHL64r1:
261 case X86::SHL8rCL:
262 case X86::SHL16rCL:
263 case X86::SHL32rCL:
264 case X86::SHL64rCL:
265 case X86::SHL8ri:
266 case X86::SHL16ri:
267 case X86::SHL32ri:
268 case X86::SHL64ri:
269 case X86::SHR8r1:
270 case X86::SHR16r1:
271 case X86::SHR32r1:
272 case X86::SHR64r1:
273 case X86::SHR8rCL:
274 case X86::SHR16rCL:
275 case X86::SHR32rCL:
276 case X86::SHR64rCL:
277 case X86::SHR8ri:
278 case X86::SHR16ri:
279 case X86::SHR32ri:
280 case X86::SHR64ri:
281 case X86::SHLD16rrCL:
282 case X86::SHLD32rrCL:
283 case X86::SHLD64rrCL:
284 case X86::SHLD16rri8:
285 case X86::SHLD32rri8:
286 case X86::SHLD64rri8:
287 case X86::SHRD16rrCL:
288 case X86::SHRD32rrCL:
289 case X86::SHRD64rrCL:
290 case X86::SHRD16rri8:
291 case X86::SHRD32rri8:
292 case X86::SHRD64rri8:
293
294 // Basic arithmetic is constant time on the input but does set flags.
295 case X86::ADC8rr:
296 case X86::ADC8ri:
297 case X86::ADC16rr:
298 case X86::ADC16ri:
299 case X86::ADC16ri8:
300 case X86::ADC32rr:
301 case X86::ADC32ri:
302 case X86::ADC32ri8:
303 case X86::ADC64rr:
304 case X86::ADC64ri8:
305 case X86::ADC64ri32:
306 case X86::ADD8rr:
307 case X86::ADD8ri:
308 case X86::ADD16rr:
309 case X86::ADD16ri:
310 case X86::ADD16ri8:
311 case X86::ADD32rr:
312 case X86::ADD32ri:
313 case X86::ADD32ri8:
314 case X86::ADD64rr:
315 case X86::ADD64ri8:
316 case X86::ADD64ri32:
317 case X86::AND8rr:
318 case X86::AND8ri:
319 case X86::AND16rr:
320 case X86::AND16ri:
321 case X86::AND16ri8:
322 case X86::AND32rr:
323 case X86::AND32ri:
324 case X86::AND32ri8:
325 case X86::AND64rr:
326 case X86::AND64ri8:
327 case X86::AND64ri32:
328 case X86::OR8rr:
329 case X86::OR8ri:
330 case X86::OR16rr:
331 case X86::OR16ri:
332 case X86::OR16ri8:
333 case X86::OR32rr:
334 case X86::OR32ri:
335 case X86::OR32ri8:
336 case X86::OR64rr:
337 case X86::OR64ri8:
338 case X86::OR64ri32:
339 case X86::SBB8rr:
340 case X86::SBB8ri:
341 case X86::SBB16rr:
342 case X86::SBB16ri:
343 case X86::SBB16ri8:
344 case X86::SBB32rr:
345 case X86::SBB32ri:
346 case X86::SBB32ri8:
347 case X86::SBB64rr:
348 case X86::SBB64ri8:
349 case X86::SBB64ri32:
350 case X86::SUB8rr:
351 case X86::SUB8ri:
352 case X86::SUB16rr:
353 case X86::SUB16ri:
354 case X86::SUB16ri8:
355 case X86::SUB32rr:
356 case X86::SUB32ri:
357 case X86::SUB32ri8:
358 case X86::SUB64rr:
359 case X86::SUB64ri8:
360 case X86::SUB64ri32:
361 case X86::XOR8rr:
362 case X86::XOR8ri:
363 case X86::XOR16rr:
364 case X86::XOR16ri:
365 case X86::XOR16ri8:
366 case X86::XOR32rr:
367 case X86::XOR32ri:
368 case X86::XOR32ri8:
369 case X86::XOR64rr:
370 case X86::XOR64ri8:
371 case X86::XOR64ri32:
372 // Arithmetic with just 32-bit and 64-bit variants and no immediates.
373 case X86::ADCX32rr:
374 case X86::ADCX64rr:
375 case X86::ADOX32rr:
376 case X86::ADOX64rr:
377 case X86::ANDN32rr:
378 case X86::ANDN64rr:
379 // Unary arithmetic operations.
380 case X86::DEC8r:
381 case X86::DEC16r:
382 case X86::DEC32r:
383 case X86::DEC64r:
384 case X86::INC8r:
385 case X86::INC16r:
386 case X86::INC32r:
387 case X86::INC64r:
388 case X86::NEG8r:
389 case X86::NEG16r:
390 case X86::NEG32r:
391 case X86::NEG64r:
392
393 // Unlike other arithmetic, NOT doesn't set EFLAGS.
394 case X86::NOT8r:
395 case X86::NOT16r:
396 case X86::NOT32r:
397 case X86::NOT64r:
398
399 // Various move instructions used to zero or sign extend things. Note that we
400 // intentionally don't support the _NOREX variants as we can't handle that
401 // register constraint anyways.
402 case X86::MOVSX16rr8:
403 case X86::MOVSX32rr8:
404 case X86::MOVSX32rr16:
405 case X86::MOVSX64rr8:
406 case X86::MOVSX64rr16:
407 case X86::MOVSX64rr32:
408 case X86::MOVZX16rr8:
409 case X86::MOVZX32rr8:
410 case X86::MOVZX32rr16:
411 case X86::MOVZX64rr8:
412 case X86::MOVZX64rr16:
413 case X86::MOV32rr:
414
415 // Arithmetic instructions that are both constant time and don't set flags.
416 case X86::RORX32ri:
417 case X86::RORX64ri:
418 case X86::SARX32rr:
419 case X86::SARX64rr:
420 case X86::SHLX32rr:
421 case X86::SHLX64rr:
422 case X86::SHRX32rr:
423 case X86::SHRX64rr:
424
425 // LEA doesn't actually access memory, and its arithmetic is constant time.
426 case X86::LEA16r:
427 case X86::LEA32r:
428 case X86::LEA64_32r:
429 case X86::LEA64r:
430 return true;
431 }
432}
433
434bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
435 switch (MI.getOpcode()) {
436 default:
437 // By default, assume that the load will immediately leak.
438 return false;
439
440 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
441 // However, they set flags and are perhaps the most surprisingly constant
442 // time operations so we call them out here separately.
443 case X86::IMUL16rm:
444 case X86::IMUL16rmi8:
445 case X86::IMUL16rmi:
446 case X86::IMUL32rm:
447 case X86::IMUL32rmi8:
448 case X86::IMUL32rmi:
449 case X86::IMUL64rm:
450 case X86::IMUL64rmi32:
451 case X86::IMUL64rmi8:
452
453 // Bit scanning and counting instructions that are somewhat surprisingly
454 // constant time as they scan across bits and do other fairly complex
455 // operations like popcnt, but are believed to be constant time on x86.
456 // However, these set flags.
457 case X86::BSF16rm:
458 case X86::BSF32rm:
459 case X86::BSF64rm:
460 case X86::BSR16rm:
461 case X86::BSR32rm:
462 case X86::BSR64rm:
463 case X86::LZCNT16rm:
464 case X86::LZCNT32rm:
465 case X86::LZCNT64rm:
466 case X86::POPCNT16rm:
467 case X86::POPCNT32rm:
468 case X86::POPCNT64rm:
469 case X86::TZCNT16rm:
470 case X86::TZCNT32rm:
471 case X86::TZCNT64rm:
472
473 // Bit manipulation instructions are effectively combinations of basic
474 // arithmetic ops, and should still execute in constant time. These also
475 // set flags.
476 case X86::BLCFILL32rm:
477 case X86::BLCFILL64rm:
478 case X86::BLCI32rm:
479 case X86::BLCI64rm:
480 case X86::BLCIC32rm:
481 case X86::BLCIC64rm:
482 case X86::BLCMSK32rm:
483 case X86::BLCMSK64rm:
484 case X86::BLCS32rm:
485 case X86::BLCS64rm:
486 case X86::BLSFILL32rm:
487 case X86::BLSFILL64rm:
488 case X86::BLSI32rm:
489 case X86::BLSI64rm:
490 case X86::BLSIC32rm:
491 case X86::BLSIC64rm:
492 case X86::BLSMSK32rm:
493 case X86::BLSMSK64rm:
494 case X86::BLSR32rm:
495 case X86::BLSR64rm:
496 case X86::TZMSK32rm:
497 case X86::TZMSK64rm:
498
499 // Bit extracting and clearing instructions should execute in constant time,
500 // and set flags.
501 case X86::BEXTR32rm:
502 case X86::BEXTR64rm:
503 case X86::BEXTRI32mi:
504 case X86::BEXTRI64mi:
505 case X86::BZHI32rm:
506 case X86::BZHI64rm:
507
508 // Basic arithmetic is constant time on the input but does set flags.
509 case X86::ADC8rm:
510 case X86::ADC16rm:
511 case X86::ADC32rm:
512 case X86::ADC64rm:
513 case X86::ADCX32rm:
514 case X86::ADCX64rm:
515 case X86::ADD8rm:
516 case X86::ADD16rm:
517 case X86::ADD32rm:
518 case X86::ADD64rm:
519 case X86::ADOX32rm:
520 case X86::ADOX64rm:
521 case X86::AND8rm:
522 case X86::AND16rm:
523 case X86::AND32rm:
524 case X86::AND64rm:
525 case X86::ANDN32rm:
526 case X86::ANDN64rm:
527 case X86::OR8rm:
528 case X86::OR16rm:
529 case X86::OR32rm:
530 case X86::OR64rm:
531 case X86::SBB8rm:
532 case X86::SBB16rm:
533 case X86::SBB32rm:
534 case X86::SBB64rm:
535 case X86::SUB8rm:
536 case X86::SUB16rm:
537 case X86::SUB32rm:
538 case X86::SUB64rm:
539 case X86::XOR8rm:
540 case X86::XOR16rm:
541 case X86::XOR32rm:
542 case X86::XOR64rm:
543
544 // Integer multiply w/o affecting flags is still believed to be constant
545 // time on x86. Called out separately as this is among the most surprising
546 // instructions to exhibit that behavior.
547 case X86::MULX32rm:
548 case X86::MULX64rm:
549
550 // Arithmetic instructions that are both constant time and don't set flags.
551 case X86::RORX32mi:
552 case X86::RORX64mi:
553 case X86::SARX32rm:
554 case X86::SARX64rm:
555 case X86::SHLX32rm:
556 case X86::SHLX64rm:
557 case X86::SHRX32rm:
558 case X86::SHRX64rm:
559
560 // Conversions are believed to be constant time and don't set flags.
561 case X86::CVTTSD2SI64rm:
562 case X86::VCVTTSD2SI64rm:
563 case X86::VCVTTSD2SI64Zrm:
564 case X86::CVTTSD2SIrm:
565 case X86::VCVTTSD2SIrm:
566 case X86::VCVTTSD2SIZrm:
567 case X86::CVTTSS2SI64rm:
568 case X86::VCVTTSS2SI64rm:
569 case X86::VCVTTSS2SI64Zrm:
570 case X86::CVTTSS2SIrm:
571 case X86::VCVTTSS2SIrm:
572 case X86::VCVTTSS2SIZrm:
573 case X86::CVTSI2SDrm:
574 case X86::VCVTSI2SDrm:
575 case X86::VCVTSI2SDZrm:
576 case X86::CVTSI2SSrm:
577 case X86::VCVTSI2SSrm:
578 case X86::VCVTSI2SSZrm:
579 case X86::CVTSI642SDrm:
580 case X86::VCVTSI642SDrm:
581 case X86::VCVTSI642SDZrm:
582 case X86::CVTSI642SSrm:
583 case X86::VCVTSI642SSrm:
584 case X86::VCVTSI642SSZrm:
585 case X86::CVTSS2SDrm:
586 case X86::VCVTSS2SDrm:
587 case X86::VCVTSS2SDZrm:
588 case X86::CVTSD2SSrm:
589 case X86::VCVTSD2SSrm:
590 case X86::VCVTSD2SSZrm:
591 // AVX512 added unsigned integer conversions.
592 case X86::VCVTTSD2USI64Zrm:
593 case X86::VCVTTSD2USIZrm:
594 case X86::VCVTTSS2USI64Zrm:
595 case X86::VCVTTSS2USIZrm:
596 case X86::VCVTUSI2SDZrm:
597 case X86::VCVTUSI642SDZrm:
598 case X86::VCVTUSI2SSZrm:
599 case X86::VCVTUSI642SSZrm:
600
601 // Loads to register don't set flags.
602 case X86::MOV8rm:
603 case X86::MOV8rm_NOREX:
604 case X86::MOV16rm:
605 case X86::MOV32rm:
606 case X86::MOV64rm:
607 case X86::MOVSX16rm8:
608 case X86::MOVSX32rm16:
609 case X86::MOVSX32rm8:
610 case X86::MOVSX32rm8_NOREX:
611 case X86::MOVSX64rm16:
612 case X86::MOVSX64rm32:
613 case X86::MOVSX64rm8:
614 case X86::MOVZX16rm8:
615 case X86::MOVZX32rm16:
616 case X86::MOVZX32rm8:
617 case X86::MOVZX32rm8_NOREX:
618 case X86::MOVZX64rm16:
619 case X86::MOVZX64rm8:
620 return true;
621 }
622}
623
624int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
625 const MachineFunction *MF = MI.getParent()->getParent();
626 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
627
628 if (isFrameInstr(MI)) {
629 int SPAdj = alignTo(getFrameSize(MI), TFI->getStackAlign());
630 SPAdj -= getFrameAdjustment(MI);
631 if (!isFrameSetup(MI))
632 SPAdj = -SPAdj;
633 return SPAdj;
634 }
635
636 // To know whether a call adjusts the stack, we need information
637 // that is bound to the following ADJCALLSTACKUP pseudo.
638 // Look for the next ADJCALLSTACKUP that follows the call.
639 if (MI.isCall()) {
640 const MachineBasicBlock *MBB = MI.getParent();
641 auto I = ++MachineBasicBlock::const_iterator(MI);
642 for (auto E = MBB->end(); I != E; ++I) {
643 if (I->getOpcode() == getCallFrameDestroyOpcode() ||
644 I->isCall())
645 break;
646 }
647
648 // If we could not find a frame destroy opcode, then it has already
649 // been simplified, so we don't care.
650 if (I->getOpcode() != getCallFrameDestroyOpcode())
651 return 0;
652
653 return -(I->getOperand(1).getImm());
654 }
655
656 // Currently handle only PUSHes we can reasonably expect to see
657 // in call sequences
658 switch (MI.getOpcode()) {
659 default:
660 return 0;
661 case X86::PUSH32i8:
662 case X86::PUSH32r:
663 case X86::PUSH32rmm:
664 case X86::PUSH32rmr:
665 case X86::PUSHi32:
666 return 4;
667 case X86::PUSH64i8:
668 case X86::PUSH64r:
669 case X86::PUSH64rmm:
670 case X86::PUSH64rmr:
671 case X86::PUSH64i32:
672 return 8;
673 }
674}
675
676/// Return true and the FrameIndex if the specified
677/// operand and follow operands form a reference to the stack frame.
678bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
679 int &FrameIndex) const {
680 if (MI.getOperand(Op + X86::AddrBaseReg).isFI() &&
681 MI.getOperand(Op + X86::AddrScaleAmt).isImm() &&
682 MI.getOperand(Op + X86::AddrIndexReg).isReg() &&
683 MI.getOperand(Op + X86::AddrDisp).isImm() &&
684 MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 &&
685 MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 &&
686 MI.getOperand(Op + X86::AddrDisp).getImm() == 0) {
687 FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex();
688 return true;
689 }
690 return false;
691}
692
693static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
694 switch (Opcode) {
695 default:
696 return false;
697 case X86::MOV8rm:
698 case X86::KMOVBkm:
699 MemBytes = 1;
700 return true;
701 case X86::MOV16rm:
702 case X86::KMOVWkm:
703 case X86::VMOVSHZrm:
704 case X86::VMOVSHZrm_alt:
705 MemBytes = 2;
706 return true;
707 case X86::MOV32rm:
708 case X86::MOVSSrm:
709 case X86::MOVSSrm_alt:
710 case X86::VMOVSSrm:
711 case X86::VMOVSSrm_alt:
712 case X86::VMOVSSZrm:
713 case X86::VMOVSSZrm_alt:
714 case X86::KMOVDkm:
715 MemBytes = 4;
716 return true;
717 case X86::MOV64rm:
718 case X86::LD_Fp64m:
719 case X86::MOVSDrm:
720 case X86::MOVSDrm_alt:
721 case X86::VMOVSDrm:
722 case X86::VMOVSDrm_alt:
723 case X86::VMOVSDZrm:
724 case X86::VMOVSDZrm_alt:
725 case X86::MMX_MOVD64rm:
726 case X86::MMX_MOVQ64rm:
727 case X86::KMOVQkm:
728 MemBytes = 8;
729 return true;
730 case X86::MOVAPSrm:
731 case X86::MOVUPSrm:
732 case X86::MOVAPDrm:
733 case X86::MOVUPDrm:
734 case X86::MOVDQArm:
735 case X86::MOVDQUrm:
736 case X86::VMOVAPSrm:
737 case X86::VMOVUPSrm:
738 case X86::VMOVAPDrm:
739 case X86::VMOVUPDrm:
740 case X86::VMOVDQArm:
741 case X86::VMOVDQUrm:
742 case X86::VMOVAPSZ128rm:
743 case X86::VMOVUPSZ128rm:
744 case X86::VMOVAPSZ128rm_NOVLX:
745 case X86::VMOVUPSZ128rm_NOVLX:
746 case X86::VMOVAPDZ128rm:
747 case X86::VMOVUPDZ128rm:
748 case X86::VMOVDQU8Z128rm:
749 case X86::VMOVDQU16Z128rm:
750 case X86::VMOVDQA32Z128rm:
751 case X86::VMOVDQU32Z128rm:
752 case X86::VMOVDQA64Z128rm:
753 case X86::VMOVDQU64Z128rm:
754 MemBytes = 16;
755 return true;
756 case X86::VMOVAPSYrm:
757 case X86::VMOVUPSYrm:
758 case X86::VMOVAPDYrm:
759 case X86::VMOVUPDYrm:
760 case X86::VMOVDQAYrm:
761 case X86::VMOVDQUYrm:
762 case X86::VMOVAPSZ256rm:
763 case X86::VMOVUPSZ256rm:
764 case X86::VMOVAPSZ256rm_NOVLX:
765 case X86::VMOVUPSZ256rm_NOVLX:
766 case X86::VMOVAPDZ256rm:
767 case X86::VMOVUPDZ256rm:
768 case X86::VMOVDQU8Z256rm:
769 case X86::VMOVDQU16Z256rm:
770 case X86::VMOVDQA32Z256rm:
771 case X86::VMOVDQU32Z256rm:
772 case X86::VMOVDQA64Z256rm:
773 case X86::VMOVDQU64Z256rm:
774 MemBytes = 32;
775 return true;
776 case X86::VMOVAPSZrm:
777 case X86::VMOVUPSZrm:
778 case X86::VMOVAPDZrm:
779 case X86::VMOVUPDZrm:
780 case X86::VMOVDQU8Zrm:
781 case X86::VMOVDQU16Zrm:
782 case X86::VMOVDQA32Zrm:
783 case X86::VMOVDQU32Zrm:
784 case X86::VMOVDQA64Zrm:
785 case X86::VMOVDQU64Zrm:
786 MemBytes = 64;
787 return true;
788 }
789}
790
791static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
792 switch (Opcode) {
793 default:
794 return false;
795 case X86::MOV8mr:
796 case X86::KMOVBmk:
797 MemBytes = 1;
798 return true;
799 case X86::MOV16mr:
800 case X86::KMOVWmk:
801 case X86::VMOVSHZmr:
802 MemBytes = 2;
803 return true;
804 case X86::MOV32mr:
805 case X86::MOVSSmr:
806 case X86::VMOVSSmr:
807 case X86::VMOVSSZmr:
808 case X86::KMOVDmk:
809 MemBytes = 4;
810 return true;
811 case X86::MOV64mr:
812 case X86::ST_FpP64m:
813 case X86::MOVSDmr:
814 case X86::VMOVSDmr:
815 case X86::VMOVSDZmr:
816 case X86::MMX_MOVD64mr:
817 case X86::MMX_MOVQ64mr:
818 case X86::MMX_MOVNTQmr:
819 case X86::KMOVQmk:
820 MemBytes = 8;
821 return true;
822 case X86::MOVAPSmr:
823 case X86::MOVUPSmr:
824 case X86::MOVAPDmr:
825 case X86::MOVUPDmr:
826 case X86::MOVDQAmr:
827 case X86::MOVDQUmr:
828 case X86::VMOVAPSmr:
829 case X86::VMOVUPSmr:
830 case X86::VMOVAPDmr:
831 case X86::VMOVUPDmr:
832 case X86::VMOVDQAmr:
833 case X86::VMOVDQUmr:
834 case X86::VMOVUPSZ128mr:
835 case X86::VMOVAPSZ128mr:
836 case X86::VMOVUPSZ128mr_NOVLX:
837 case X86::VMOVAPSZ128mr_NOVLX:
838 case X86::VMOVUPDZ128mr:
839 case X86::VMOVAPDZ128mr:
840 case X86::VMOVDQA32Z128mr:
841 case X86::VMOVDQU32Z128mr:
842 case X86::VMOVDQA64Z128mr:
843 case X86::VMOVDQU64Z128mr:
844 case X86::VMOVDQU8Z128mr:
845 case X86::VMOVDQU16Z128mr:
846 MemBytes = 16;
847 return true;
848 case X86::VMOVUPSYmr:
849 case X86::VMOVAPSYmr:
850 case X86::VMOVUPDYmr:
851 case X86::VMOVAPDYmr:
852 case X86::VMOVDQUYmr:
853 case X86::VMOVDQAYmr:
854 case X86::VMOVUPSZ256mr:
855 case X86::VMOVAPSZ256mr:
856 case X86::VMOVUPSZ256mr_NOVLX:
857 case X86::VMOVAPSZ256mr_NOVLX:
858 case X86::VMOVUPDZ256mr:
859 case X86::VMOVAPDZ256mr:
860 case X86::VMOVDQU8Z256mr:
861 case X86::VMOVDQU16Z256mr:
862 case X86::VMOVDQA32Z256mr:
863 case X86::VMOVDQU32Z256mr:
864 case X86::VMOVDQA64Z256mr:
865 case X86::VMOVDQU64Z256mr:
866 MemBytes = 32;
867 return true;
868 case X86::VMOVUPSZmr:
869 case X86::VMOVAPSZmr:
870 case X86::VMOVUPDZmr:
871 case X86::VMOVAPDZmr:
872 case X86::VMOVDQU8Zmr:
873 case X86::VMOVDQU16Zmr:
874 case X86::VMOVDQA32Zmr:
875 case X86::VMOVDQU32Zmr:
876 case X86::VMOVDQA64Zmr:
877 case X86::VMOVDQU64Zmr:
878 MemBytes = 64;
879 return true;
880 }
881 return false;
882}
883
884unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
885 int &FrameIndex) const {
886 unsigned Dummy;
887 return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
888}
889
890unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
891 int &FrameIndex,
892 unsigned &MemBytes) const {
893 if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
894 if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
895 return MI.getOperand(0).getReg();
896 return 0;
897}
898
899unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
900 int &FrameIndex) const {
901 unsigned Dummy;
902 if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
903 unsigned Reg;
904 if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
905 return Reg;
906 // Check for post-frame index elimination operations
907 SmallVector<const MachineMemOperand *, 1> Accesses;
908 if (hasLoadFromStackSlot(MI, Accesses)) {
909 FrameIndex =
910 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
911 ->getFrameIndex();
912 return MI.getOperand(0).getReg();
913 }
914 }
915 return 0;
916}
917
918unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
919 int &FrameIndex) const {
920 unsigned Dummy;
921 return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
922}
923
924unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
925 int &FrameIndex,
926 unsigned &MemBytes) const {
927 if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
928 if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
929 isFrameOperand(MI, 0, FrameIndex))
930 return MI.getOperand(X86::AddrNumOperands).getReg();
931 return 0;
932}
933
934unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
935 int &FrameIndex) const {
936 unsigned Dummy;
937 if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
938 unsigned Reg;
939 if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
940 return Reg;
941 // Check for post-frame index elimination operations
942 SmallVector<const MachineMemOperand *, 1> Accesses;
943 if (hasStoreToStackSlot(MI, Accesses)) {
944 FrameIndex =
945 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
946 ->getFrameIndex();
947 return MI.getOperand(X86::AddrNumOperands).getReg();
948 }
949 }
950 return 0;
951}
952
953/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
954static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
955 // Don't waste compile time scanning use-def chains of physregs.
956 if (!BaseReg.isVirtual())
957 return false;
958 bool isPICBase = false;
959 for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg),
960 E = MRI.def_instr_end(); I != E; ++I) {
961 MachineInstr *DefMI = &*I;
962 if (DefMI->getOpcode() != X86::MOVPC32r)
963 return false;
964 assert(!isPICBase && "More than one PIC base?")(static_cast <bool> (!isPICBase && "More than one PIC base?"
) ? void (0) : __assert_fail ("!isPICBase && \"More than one PIC base?\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 964, __extension__ __PRETTY_FUNCTION__
))
;
965 isPICBase = true;
966 }
967 return isPICBase;
968}
969
970bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
971 AAResults *AA) const {
972 switch (MI.getOpcode()) {
973 default:
974 // This function should only be called for opcodes with the ReMaterializable
975 // flag set.
976 llvm_unreachable("Unknown rematerializable operation!")::llvm::llvm_unreachable_internal("Unknown rematerializable operation!"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 976)
;
977 break;
978
979 case X86::LOAD_STACK_GUARD:
980 case X86::AVX1_SETALLONES:
981 case X86::AVX2_SETALLONES:
982 case X86::AVX512_128_SET0:
983 case X86::AVX512_256_SET0:
984 case X86::AVX512_512_SET0:
985 case X86::AVX512_512_SETALLONES:
986 case X86::AVX512_FsFLD0SD:
987 case X86::AVX512_FsFLD0SH:
988 case X86::AVX512_FsFLD0SS:
989 case X86::AVX512_FsFLD0F128:
990 case X86::AVX_SET0:
991 case X86::FsFLD0SD:
992 case X86::FsFLD0SS:
993 case X86::FsFLD0F128:
994 case X86::KSET0D:
995 case X86::KSET0Q:
996 case X86::KSET0W:
997 case X86::KSET1D:
998 case X86::KSET1Q:
999 case X86::KSET1W:
1000 case X86::MMX_SET0:
1001 case X86::MOV32ImmSExti8:
1002 case X86::MOV32r0:
1003 case X86::MOV32r1:
1004 case X86::MOV32r_1:
1005 case X86::MOV32ri64:
1006 case X86::MOV64ImmSExti8:
1007 case X86::V_SET0:
1008 case X86::V_SETALLONES:
1009 case X86::MOV16ri:
1010 case X86::MOV32ri:
1011 case X86::MOV64ri:
1012 case X86::MOV64ri32:
1013 case X86::MOV8ri:
1014 case X86::PTILEZEROV:
1015 return true;
1016
1017 case X86::MOV8rm:
1018 case X86::MOV8rm_NOREX:
1019 case X86::MOV16rm:
1020 case X86::MOV32rm:
1021 case X86::MOV64rm:
1022 case X86::MOVSSrm:
1023 case X86::MOVSSrm_alt:
1024 case X86::MOVSDrm:
1025 case X86::MOVSDrm_alt:
1026 case X86::MOVAPSrm:
1027 case X86::MOVUPSrm:
1028 case X86::MOVAPDrm:
1029 case X86::MOVUPDrm:
1030 case X86::MOVDQArm:
1031 case X86::MOVDQUrm:
1032 case X86::VMOVSSrm:
1033 case X86::VMOVSSrm_alt:
1034 case X86::VMOVSDrm:
1035 case X86::VMOVSDrm_alt:
1036 case X86::VMOVAPSrm:
1037 case X86::VMOVUPSrm:
1038 case X86::VMOVAPDrm:
1039 case X86::VMOVUPDrm:
1040 case X86::VMOVDQArm:
1041 case X86::VMOVDQUrm:
1042 case X86::VMOVAPSYrm:
1043 case X86::VMOVUPSYrm:
1044 case X86::VMOVAPDYrm:
1045 case X86::VMOVUPDYrm:
1046 case X86::VMOVDQAYrm:
1047 case X86::VMOVDQUYrm:
1048 case X86::MMX_MOVD64rm:
1049 case X86::MMX_MOVQ64rm:
1050 // AVX-512
1051 case X86::VMOVSSZrm:
1052 case X86::VMOVSSZrm_alt:
1053 case X86::VMOVSDZrm:
1054 case X86::VMOVSDZrm_alt:
1055 case X86::VMOVSHZrm:
1056 case X86::VMOVSHZrm_alt:
1057 case X86::VMOVAPDZ128rm:
1058 case X86::VMOVAPDZ256rm:
1059 case X86::VMOVAPDZrm:
1060 case X86::VMOVAPSZ128rm:
1061 case X86::VMOVAPSZ256rm:
1062 case X86::VMOVAPSZ128rm_NOVLX:
1063 case X86::VMOVAPSZ256rm_NOVLX:
1064 case X86::VMOVAPSZrm:
1065 case X86::VMOVDQA32Z128rm:
1066 case X86::VMOVDQA32Z256rm:
1067 case X86::VMOVDQA32Zrm:
1068 case X86::VMOVDQA64Z128rm:
1069 case X86::VMOVDQA64Z256rm:
1070 case X86::VMOVDQA64Zrm:
1071 case X86::VMOVDQU16Z128rm:
1072 case X86::VMOVDQU16Z256rm:
1073 case X86::VMOVDQU16Zrm:
1074 case X86::VMOVDQU32Z128rm:
1075 case X86::VMOVDQU32Z256rm:
1076 case X86::VMOVDQU32Zrm:
1077 case X86::VMOVDQU64Z128rm:
1078 case X86::VMOVDQU64Z256rm:
1079 case X86::VMOVDQU64Zrm:
1080 case X86::VMOVDQU8Z128rm:
1081 case X86::VMOVDQU8Z256rm:
1082 case X86::VMOVDQU8Zrm:
1083 case X86::VMOVUPDZ128rm:
1084 case X86::VMOVUPDZ256rm:
1085 case X86::VMOVUPDZrm:
1086 case X86::VMOVUPSZ128rm:
1087 case X86::VMOVUPSZ256rm:
1088 case X86::VMOVUPSZ128rm_NOVLX:
1089 case X86::VMOVUPSZ256rm_NOVLX:
1090 case X86::VMOVUPSZrm: {
1091 // Loads from constant pools are trivially rematerializable.
1092 if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
1093 MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
1094 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
1095 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
1096 MI.isDereferenceableInvariantLoad(AA)) {
1097 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
1098 if (BaseReg == 0 || BaseReg == X86::RIP)
1099 return true;
1100 // Allow re-materialization of PIC load.
1101 if (!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal())
1102 return false;
1103 const MachineFunction &MF = *MI.getParent()->getParent();
1104 const MachineRegisterInfo &MRI = MF.getRegInfo();
1105 return regIsPICBase(BaseReg, MRI);
1106 }
1107 return false;
1108 }
1109
1110 case X86::LEA32r:
1111 case X86::LEA64r: {
1112 if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
1113 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
1114 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
1115 !MI.getOperand(1 + X86::AddrDisp).isReg()) {
1116 // lea fi#, lea GV, etc. are all rematerializable.
1117 if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
1118 return true;
1119 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
1120 if (BaseReg == 0)
1121 return true;
1122 // Allow re-materialization of lea PICBase + x.
1123 const MachineFunction &MF = *MI.getParent()->getParent();
1124 const MachineRegisterInfo &MRI = MF.getRegInfo();
1125 return regIsPICBase(BaseReg, MRI);
1126 }
1127 return false;
1128 }
1129 }
1130}
1131
1132void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
1133 MachineBasicBlock::iterator I,
1134 Register DestReg, unsigned SubIdx,
1135 const MachineInstr &Orig,
1136 const TargetRegisterInfo &TRI) const {
1137 bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
1138 if (ClobbersEFLAGS && MBB.computeRegisterLiveness(&TRI, X86::EFLAGS, I) !=
1139 MachineBasicBlock::LQR_Dead) {
1140 // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
1141 // effects.
1142 int Value;
1143 switch (Orig.getOpcode()) {
1144 case X86::MOV32r0: Value = 0; break;
1145 case X86::MOV32r1: Value = 1; break;
1146 case X86::MOV32r_1: Value = -1; break;
1147 default:
1148 llvm_unreachable("Unexpected instruction!")::llvm::llvm_unreachable_internal("Unexpected instruction!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1148)
;
1149 }
1150
1151 const DebugLoc &DL = Orig.getDebugLoc();
1152 BuildMI(MBB, I, DL, get(X86::MOV32ri))
1153 .add(Orig.getOperand(0))
1154 .addImm(Value);
1155 } else {
1156 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1157 MBB.insert(I, MI);
1158 }
1159
1160 MachineInstr &NewMI = *std::prev(I);
1161 NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1162}
1163
1164/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
1165bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
1166 for (const MachineOperand &MO : MI.operands()) {
1167 if (MO.isReg() && MO.isDef() &&
1168 MO.getReg() == X86::EFLAGS && !MO.isDead()) {
1169 return true;
1170 }
1171 }
1172 return false;
1173}
1174
1175/// Check whether the shift count for a machine operand is non-zero.
1176inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
1177 unsigned ShiftAmtOperandIdx) {
1178 // The shift count is six bits with the REX.W prefix and five bits without.
1179 unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
1180 unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm();
1181 return Imm & ShiftCountMask;
1182}
1183
1184/// Check whether the given shift count is appropriate
1185/// can be represented by a LEA instruction.
1186inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
1187 // Left shift instructions can be transformed into load-effective-address
1188 // instructions if we can encode them appropriately.
1189 // A LEA instruction utilizes a SIB byte to encode its scale factor.
1190 // The SIB.scale field is two bits wide which means that we can encode any
1191 // shift amount less than 4.
1192 return ShAmt < 4 && ShAmt > 0;
1193}
1194
1195bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
1196 unsigned Opc, bool AllowSP, Register &NewSrc,
1197 bool &isKill, MachineOperand &ImplicitOp,
1198 LiveVariables *LV, LiveIntervals *LIS) const {
1199 MachineFunction &MF = *MI.getParent()->getParent();
1200 const TargetRegisterClass *RC;
1201 if (AllowSP) {
1202 RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
1203 } else {
1204 RC = Opc != X86::LEA32r ?
1205 &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
1206 }
1207 Register SrcReg = Src.getReg();
1208 isKill = MI.killsRegister(SrcReg);
1209
1210 // For both LEA64 and LEA32 the register already has essentially the right
1211 // type (32-bit or 64-bit) we may just need to forbid SP.
1212 if (Opc != X86::LEA64_32r) {
1213 NewSrc = SrcReg;
1214 assert(!Src.isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!Src.isUndef() && "Undef op doesn't need optimization"
) ? void (0) : __assert_fail ("!Src.isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1214, __extension__
__PRETTY_FUNCTION__))
;
1215
1216 if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
1217 return false;
1218
1219 return true;
1220 }
1221
1222 // This is for an LEA64_32r and incoming registers are 32-bit. One way or
1223 // another we need to add 64-bit registers to the final MI.
1224 if (SrcReg.isPhysical()) {
1225 ImplicitOp = Src;
1226 ImplicitOp.setImplicit();
1227
1228 NewSrc = getX86SubSuperRegister(SrcReg, 64);
1229 assert(!Src.isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!Src.isUndef() && "Undef op doesn't need optimization"
) ? void (0) : __assert_fail ("!Src.isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1229, __extension__
__PRETTY_FUNCTION__))
;
1230 } else {
1231 // Virtual register of the wrong class, we have to create a temporary 64-bit
1232 // vreg to feed into the LEA.
1233 NewSrc = MF.getRegInfo().createVirtualRegister(RC);
1234 MachineInstr *Copy =
1235 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1236 .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
1237 .addReg(SrcReg, getKillRegState(isKill));
1238
1239 // Which is obviously going to be dead after we're done with it.
1240 isKill = true;
1241
1242 if (LV)
1243 LV->replaceKillInstruction(SrcReg, MI, *Copy);
1244
1245 if (LIS) {
1246 SlotIndex CopyIdx = LIS->InsertMachineInstrInMaps(*Copy);
1247 SlotIndex Idx = LIS->getInstructionIndex(MI);
1248 LiveInterval &LI = LIS->getInterval(SrcReg);
1249 LiveRange::Segment *S = LI.getSegmentContaining(Idx);
1250 if (S->end.getBaseIndex() == Idx)
1251 S->end = CopyIdx.getRegSlot();
1252 }
1253 }
1254
1255 // We've set all the parameters without issue.
1256 return true;
1257}
1258
1259MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
1260 MachineInstr &MI,
1261 LiveVariables *LV,
1262 LiveIntervals *LIS,
1263 bool Is8BitOp) const {
1264 // We handle 8-bit adds and various 16-bit opcodes in the switch below.
1265 MachineBasicBlock &MBB = *MI.getParent();
1266 MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
1267 assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits((static_cast <bool> ((Is8BitOp || RegInfo.getTargetRegisterInfo
()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0
).getReg())) == 16) && "Unexpected type for LEA transform"
) ? void (0) : __assert_fail ("(Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && \"Unexpected type for LEA transform\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1269, __extension__
__PRETTY_FUNCTION__))
1268 *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&(static_cast <bool> ((Is8BitOp || RegInfo.getTargetRegisterInfo
()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0
).getReg())) == 16) && "Unexpected type for LEA transform"
) ? void (0) : __assert_fail ("(Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && \"Unexpected type for LEA transform\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1269, __extension__
__PRETTY_FUNCTION__))
1269 "Unexpected type for LEA transform")(static_cast <bool> ((Is8BitOp || RegInfo.getTargetRegisterInfo
()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0
).getReg())) == 16) && "Unexpected type for LEA transform"
) ? void (0) : __assert_fail ("(Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && \"Unexpected type for LEA transform\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1269, __extension__
__PRETTY_FUNCTION__))
;
1270
1271 // TODO: For a 32-bit target, we need to adjust the LEA variables with
1272 // something like this:
1273 // Opcode = X86::LEA32r;
1274 // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1275 // OutRegLEA =
1276 // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
1277 // : RegInfo.createVirtualRegister(&X86::GR32RegClass);
1278 if (!Subtarget.is64Bit())
1279 return nullptr;
1280
1281 unsigned Opcode = X86::LEA64_32r;
1282 Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1283 Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
1284 Register InRegLEA2;
1285
1286 // Build and insert into an implicit UNDEF value. This is OK because
1287 // we will be shifting and then extracting the lower 8/16-bits.
1288 // This has the potential to cause partial register stall. e.g.
1289 // movw (%rbp,%rcx,2), %dx
1290 // leal -65(%rdx), %esi
1291 // But testing has shown this *does* help performance in 64-bit mode (at
1292 // least on modern x86 machines).
1293 MachineBasicBlock::iterator MBBI = MI.getIterator();
1294 Register Dest = MI.getOperand(0).getReg();
1295 Register Src = MI.getOperand(1).getReg();
1296 Register Src2;
1297 bool IsDead = MI.getOperand(0).isDead();
1298 bool IsKill = MI.getOperand(1).isKill();
1299 unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
1300 assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!MI.getOperand(1).isUndef() &&
"Undef op doesn't need optimization") ? void (0) : __assert_fail
("!MI.getOperand(1).isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1300, __extension__
__PRETTY_FUNCTION__))
;
1301 MachineInstr *ImpDef =
1302 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
1303 MachineInstr *InsMI =
1304 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1305 .addReg(InRegLEA, RegState::Define, SubReg)
1306 .addReg(Src, getKillRegState(IsKill));
1307 MachineInstr *ImpDef2 = nullptr;
1308 MachineInstr *InsMI2 = nullptr;
1309
1310 MachineInstrBuilder MIB =
1311 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
1312 switch (MIOpc) {
1313 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1313)
;
1314 case X86::SHL8ri:
1315 case X86::SHL16ri: {
1316 unsigned ShAmt = MI.getOperand(2).getImm();
1317 MIB.addReg(0).addImm(1ULL << ShAmt)
1318 .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
1319 break;
1320 }
1321 case X86::INC8r:
1322 case X86::INC16r:
1323 addRegOffset(MIB, InRegLEA, true, 1);
1324 break;
1325 case X86::DEC8r:
1326 case X86::DEC16r:
1327 addRegOffset(MIB, InRegLEA, true, -1);
1328 break;
1329 case X86::ADD8ri:
1330 case X86::ADD8ri_DB:
1331 case X86::ADD16ri:
1332 case X86::ADD16ri8:
1333 case X86::ADD16ri_DB:
1334 case X86::ADD16ri8_DB:
1335 addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
1336 break;
1337 case X86::ADD8rr:
1338 case X86::ADD8rr_DB:
1339 case X86::ADD16rr:
1340 case X86::ADD16rr_DB: {
1341 Src2 = MI.getOperand(2).getReg();
1342 bool IsKill2 = MI.getOperand(2).isKill();
1343 assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!MI.getOperand(2).isUndef() &&
"Undef op doesn't need optimization") ? void (0) : __assert_fail
("!MI.getOperand(2).isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1343, __extension__
__PRETTY_FUNCTION__))
;
1344 if (Src == Src2) {
1345 // ADD8rr/ADD16rr killed %reg1028, %reg1028
1346 // just a single insert_subreg.
1347 addRegReg(MIB, InRegLEA, true, InRegLEA, false);
1348 } else {
1349 if (Subtarget.is64Bit())
1350 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1351 else
1352 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1353 // Build and insert into an implicit UNDEF value. This is OK because
1354 // we will be shifting and then extracting the lower 8/16-bits.
1355 ImpDef2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF),
1356 InRegLEA2);
1357 InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
1358 .addReg(InRegLEA2, RegState::Define, SubReg)
1359 .addReg(Src2, getKillRegState(IsKill2));
1360 addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
1361 }
1362 if (LV && IsKill2 && InsMI2)
1363 LV->replaceKillInstruction(Src2, MI, *InsMI2);
1364 break;
1365 }
1366 }
1367
1368 MachineInstr *NewMI = MIB;
1369 MachineInstr *ExtMI =
1370 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1371 .addReg(Dest, RegState::Define | getDeadRegState(IsDead))
1372 .addReg(OutRegLEA, RegState::Kill, SubReg);
1373
1374 if (LV) {
1375 // Update live variables.
1376 LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
1377 LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
1378 if (IsKill)
1379 LV->replaceKillInstruction(Src, MI, *InsMI);
1380 if (IsDead)
1381 LV->replaceKillInstruction(Dest, MI, *ExtMI);
1382 }
1383
1384 if (LIS) {
1385 LIS->InsertMachineInstrInMaps(*ImpDef);
1386 SlotIndex InsIdx = LIS->InsertMachineInstrInMaps(*InsMI);
1387 if (ImpDef2)
1388 LIS->InsertMachineInstrInMaps(*ImpDef2);
1389 SlotIndex Ins2Idx;
1390 if (InsMI2)
1391 Ins2Idx = LIS->InsertMachineInstrInMaps(*InsMI2);
1392 SlotIndex NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
1393 SlotIndex ExtIdx = LIS->InsertMachineInstrInMaps(*ExtMI);
1394 LIS->getInterval(InRegLEA);
1395 LIS->getInterval(OutRegLEA);
1396 if (InRegLEA2)
1397 LIS->getInterval(InRegLEA2);
1398
1399 // Move the use of Src up to InsMI.
1400 LiveInterval &SrcLI = LIS->getInterval(Src);
1401 LiveRange::Segment *SrcSeg = SrcLI.getSegmentContaining(NewIdx);
1402 if (SrcSeg->end == NewIdx.getRegSlot())
1403 SrcSeg->end = InsIdx.getRegSlot();
1404
1405 if (InsMI2) {
1406 // Move the use of Src2 up to InsMI2.
1407 LiveInterval &Src2LI = LIS->getInterval(Src2);
1408 LiveRange::Segment *Src2Seg = Src2LI.getSegmentContaining(NewIdx);
1409 if (Src2Seg->end == NewIdx.getRegSlot())
1410 Src2Seg->end = Ins2Idx.getRegSlot();
1411 }
1412
1413 // Move the definition of Dest down to ExtMI.
1414 LiveInterval &DestLI = LIS->getInterval(Dest);
1415 LiveRange::Segment *DestSeg =
1416 DestLI.getSegmentContaining(NewIdx.getRegSlot());
1417 assert(DestSeg->start == NewIdx.getRegSlot() &&(static_cast <bool> (DestSeg->start == NewIdx.getRegSlot
() && DestSeg->valno->def == NewIdx.getRegSlot(
)) ? void (0) : __assert_fail ("DestSeg->start == NewIdx.getRegSlot() && DestSeg->valno->def == NewIdx.getRegSlot()"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1418, __extension__
__PRETTY_FUNCTION__))
1418 DestSeg->valno->def == NewIdx.getRegSlot())(static_cast <bool> (DestSeg->start == NewIdx.getRegSlot
() && DestSeg->valno->def == NewIdx.getRegSlot(
)) ? void (0) : __assert_fail ("DestSeg->start == NewIdx.getRegSlot() && DestSeg->valno->def == NewIdx.getRegSlot()"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1418, __extension__
__PRETTY_FUNCTION__))
;
1419 DestSeg->start = ExtIdx.getRegSlot();
1420 DestSeg->valno->def = ExtIdx.getRegSlot();
1421 }
1422
1423 return ExtMI;
1424}
1425
1426/// This method must be implemented by targets that
1427/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
1428/// may be able to convert a two-address instruction into a true
1429/// three-address instruction on demand. This allows the X86 target (for
1430/// example) to convert ADD and SHL instructions into LEA instructions if they
1431/// would require register copies due to two-addressness.
1432///
1433/// This method returns a null pointer if the transformation cannot be
1434/// performed, otherwise it returns the new instruction.
1435///
1436MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
1437 LiveVariables *LV,
1438 LiveIntervals *LIS) const {
1439 // The following opcodes also sets the condition code register(s). Only
1440 // convert them to equivalent lea if the condition code register def's
1441 // are dead!
1442 if (hasLiveCondCodeDef(MI))
1443 return nullptr;
1444
1445 MachineFunction &MF = *MI.getParent()->getParent();
1446 // All instructions input are two-addr instructions. Get the known operands.
1447 const MachineOperand &Dest = MI.getOperand(0);
1448 const MachineOperand &Src = MI.getOperand(1);
1449
1450 // Ideally, operations with undef should be folded before we get here, but we
1451 // can't guarantee it. Bail out because optimizing undefs is a waste of time.
1452 // Without this, we have to forward undef state to new register operands to
1453 // avoid machine verifier errors.
1454 if (Src.isUndef())
1455 return nullptr;
1456 if (MI.getNumOperands() > 2)
1457 if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
1458 return nullptr;
1459
1460 MachineInstr *NewMI = nullptr;
1461 Register SrcReg, SrcReg2;
1462 bool Is64Bit = Subtarget.is64Bit();
1463
1464 bool Is8BitOp = false;
1465 unsigned MIOpc = MI.getOpcode();
1466 switch (MIOpc) {
1467 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1467)
;
1468 case X86::SHL64ri: {
1469 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown shift instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown shift instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1469, __extension__
__PRETTY_FUNCTION__))
;
1470 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1471 if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
1472
1473 // LEA can't handle RSP.
1474 if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass(
1475 Src.getReg(), &X86::GR64_NOSPRegClass))
1476 return nullptr;
1477
1478 NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
1479 .add(Dest)
1480 .addReg(0)
1481 .addImm(1ULL << ShAmt)
1482 .add(Src)
1483 .addImm(0)
1484 .addReg(0);
1485 break;
1486 }
1487 case X86::SHL32ri: {
1488 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown shift instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown shift instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1488, __extension__
__PRETTY_FUNCTION__))
;
1489 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1490 if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
1491
1492 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1493
1494 // LEA can't handle ESP.
1495 bool isKill;
1496 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1497 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1498 ImplicitOp, LV, LIS))
1499 return nullptr;
1500
1501 MachineInstrBuilder MIB =
1502 BuildMI(MF, MI.getDebugLoc(), get(Opc))
1503 .add(Dest)
1504 .addReg(0)
1505 .addImm(1ULL << ShAmt)
1506 .addReg(SrcReg, getKillRegState(isKill))
1507 .addImm(0)
1508 .addReg(0);
1509 if (ImplicitOp.getReg() != 0)
1510 MIB.add(ImplicitOp);
1511 NewMI = MIB;
1512
1513 break;
1514 }
1515 case X86::SHL8ri:
1516 Is8BitOp = true;
1517 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1518 case X86::SHL16ri: {
1519 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown shift instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown shift instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1519, __extension__
__PRETTY_FUNCTION__))
;
1520 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1521 if (!isTruncatedShiftCountForLEA(ShAmt))
1522 return nullptr;
1523 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1524 }
1525 case X86::INC64r:
1526 case X86::INC32r: {
1527 assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!")(static_cast <bool> (MI.getNumOperands() >= 2 &&
"Unknown inc instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 2 && \"Unknown inc instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1527, __extension__
__PRETTY_FUNCTION__))
;
1528 unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
1529 (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1530 bool isKill;
1531 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1532 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1533 ImplicitOp, LV, LIS))
1534 return nullptr;
1535
1536 MachineInstrBuilder MIB =
1537 BuildMI(MF, MI.getDebugLoc(), get(Opc))
1538 .add(Dest)
1539 .addReg(SrcReg, getKillRegState(isKill));
1540 if (ImplicitOp.getReg() != 0)
1541 MIB.add(ImplicitOp);
1542
1543 NewMI = addOffset(MIB, 1);
1544 break;
1545 }
1546 case X86::DEC64r:
1547 case X86::DEC32r: {
1548 assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!")(static_cast <bool> (MI.getNumOperands() >= 2 &&
"Unknown dec instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 2 && \"Unknown dec instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1548, __extension__
__PRETTY_FUNCTION__))
;
1549 unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
1550 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1551
1552 bool isKill;
1553 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1554 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1555 ImplicitOp, LV, LIS))
1556 return nullptr;
1557
1558 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1559 .add(Dest)
1560 .addReg(SrcReg, getKillRegState(isKill));
1561 if (ImplicitOp.getReg() != 0)
1562 MIB.add(ImplicitOp);
1563
1564 NewMI = addOffset(MIB, -1);
1565
1566 break;
1567 }
1568 case X86::DEC8r:
1569 case X86::INC8r:
1570 Is8BitOp = true;
1571 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1572 case X86::DEC16r:
1573 case X86::INC16r:
1574 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1575 case X86::ADD64rr:
1576 case X86::ADD64rr_DB:
1577 case X86::ADD32rr:
1578 case X86::ADD32rr_DB: {
1579 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1579, __extension__
__PRETTY_FUNCTION__))
;
1580 unsigned Opc;
1581 if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
1582 Opc = X86::LEA64r;
1583 else
1584 Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1585
1586 const MachineOperand &Src2 = MI.getOperand(2);
1587 bool isKill2;
1588 MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
1589 if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/false, SrcReg2, isKill2,
1590 ImplicitOp2, LV, LIS))
1591 return nullptr;
1592
1593 bool isKill;
1594 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1595 if (Src.getReg() == Src2.getReg()) {
1596 // Don't call classify LEAReg a second time on the same register, in case
1597 // the first call inserted a COPY from Src2 and marked it as killed.
1598 isKill = isKill2;
1599 SrcReg = SrcReg2;
1600 } else {
1601 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1602 ImplicitOp, LV, LIS))
1603 return nullptr;
1604 }
1605
1606 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
1607 if (ImplicitOp.getReg() != 0)
1608 MIB.add(ImplicitOp);
1609 if (ImplicitOp2.getReg() != 0)
1610 MIB.add(ImplicitOp2);
1611
1612 NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
1613 if (LV && Src2.isKill())
1614 LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
1615 break;
1616 }
1617 case X86::ADD8rr:
1618 case X86::ADD8rr_DB:
1619 Is8BitOp = true;
1620 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1621 case X86::ADD16rr:
1622 case X86::ADD16rr_DB:
1623 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1624 case X86::ADD64ri32:
1625 case X86::ADD64ri8:
1626 case X86::ADD64ri32_DB:
1627 case X86::ADD64ri8_DB:
1628 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1628, __extension__
__PRETTY_FUNCTION__))
;
1629 NewMI = addOffset(
1630 BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
1631 MI.getOperand(2));
1632 break;
1633 case X86::ADD32ri:
1634 case X86::ADD32ri8:
1635 case X86::ADD32ri_DB:
1636 case X86::ADD32ri8_DB: {
1637 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1637, __extension__
__PRETTY_FUNCTION__))
;
1638 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1639
1640 bool isKill;
1641 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1642 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1643 ImplicitOp, LV, LIS))
1644 return nullptr;
1645
1646 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1647 .add(Dest)
1648 .addReg(SrcReg, getKillRegState(isKill));
1649 if (ImplicitOp.getReg() != 0)
1650 MIB.add(ImplicitOp);
1651
1652 NewMI = addOffset(MIB, MI.getOperand(2));
1653 break;
1654 }
1655 case X86::ADD8ri:
1656 case X86::ADD8ri_DB:
1657 Is8BitOp = true;
1658 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1659 case X86::ADD16ri:
1660 case X86::ADD16ri8:
1661 case X86::ADD16ri_DB:
1662 case X86::ADD16ri8_DB:
1663 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1664 case X86::SUB8ri:
1665 case X86::SUB16ri8:
1666 case X86::SUB16ri:
1667 /// FIXME: Support these similar to ADD8ri/ADD16ri*.
1668 return nullptr;
1669 case X86::SUB32ri8:
1670 case X86::SUB32ri: {
1671 if (!MI.getOperand(2).isImm())
1672 return nullptr;
1673 int64_t Imm = MI.getOperand(2).getImm();
1674 if (!isInt<32>(-Imm))
1675 return nullptr;
1676
1677 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1677, __extension__
__PRETTY_FUNCTION__))
;
1678 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1679
1680 bool isKill;
1681 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1682 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1683 ImplicitOp, LV, LIS))
1684 return nullptr;
1685
1686 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1687 .add(Dest)
1688 .addReg(SrcReg, getKillRegState(isKill));
1689 if (ImplicitOp.getReg() != 0)
1690 MIB.add(ImplicitOp);
1691
1692 NewMI = addOffset(MIB, -Imm);
1693 break;
1694 }
1695
1696 case X86::SUB64ri8:
1697 case X86::SUB64ri32: {
1698 if (!MI.getOperand(2).isImm())
1699 return nullptr;
1700 int64_t Imm = MI.getOperand(2).getImm();
1701 if (!isInt<32>(-Imm))
1702 return nullptr;
1703
1704 assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown sub instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown sub instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1704, __extension__
__PRETTY_FUNCTION__))
;
1705
1706 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(),
1707 get(X86::LEA64r)).add(Dest).add(Src);
1708 NewMI = addOffset(MIB, -Imm);
1709 break;
1710 }
1711
1712 case X86::VMOVDQU8Z128rmk:
1713 case X86::VMOVDQU8Z256rmk:
1714 case X86::VMOVDQU8Zrmk:
1715 case X86::VMOVDQU16Z128rmk:
1716 case X86::VMOVDQU16Z256rmk:
1717 case X86::VMOVDQU16Zrmk:
1718 case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
1719 case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
1720 case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
1721 case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
1722 case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
1723 case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
1724 case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
1725 case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
1726 case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
1727 case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
1728 case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
1729 case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
1730 case X86::VBROADCASTSDZ256rmk:
1731 case X86::VBROADCASTSDZrmk:
1732 case X86::VBROADCASTSSZ128rmk:
1733 case X86::VBROADCASTSSZ256rmk:
1734 case X86::VBROADCASTSSZrmk:
1735 case X86::VPBROADCASTDZ128rmk:
1736 case X86::VPBROADCASTDZ256rmk:
1737 case X86::VPBROADCASTDZrmk:
1738 case X86::VPBROADCASTQZ128rmk:
1739 case X86::VPBROADCASTQZ256rmk:
1740 case X86::VPBROADCASTQZrmk: {
1741 unsigned Opc;
1742 switch (MIOpc) {
1743 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1743)
;
1744 case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
1745 case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
1746 case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
1747 case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
1748 case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
1749 case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
1750 case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
1751 case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
1752 case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
1753 case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
1754 case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
1755 case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
1756 case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
1757 case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
1758 case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
1759 case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
1760 case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
1761 case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
1762 case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
1763 case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
1764 case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
1765 case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
1766 case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
1767 case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
1768 case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
1769 case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
1770 case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
1771 case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
1772 case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
1773 case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
1774 case X86::VBROADCASTSDZ256rmk: Opc = X86::VBLENDMPDZ256rmbk; break;
1775 case X86::VBROADCASTSDZrmk: Opc = X86::VBLENDMPDZrmbk; break;
1776 case X86::VBROADCASTSSZ128rmk: Opc = X86::VBLENDMPSZ128rmbk; break;
1777 case X86::VBROADCASTSSZ256rmk: Opc = X86::VBLENDMPSZ256rmbk; break;
1778 case X86::VBROADCASTSSZrmk: Opc = X86::VBLENDMPSZrmbk; break;
1779 case X86::VPBROADCASTDZ128rmk: Opc = X86::VPBLENDMDZ128rmbk; break;
1780 case X86::VPBROADCASTDZ256rmk: Opc = X86::VPBLENDMDZ256rmbk; break;
1781 case X86::VPBROADCASTDZrmk: Opc = X86::VPBLENDMDZrmbk; break;
1782 case X86::VPBROADCASTQZ128rmk: Opc = X86::VPBLENDMQZ128rmbk; break;
1783 case X86::VPBROADCASTQZ256rmk: Opc = X86::VPBLENDMQZ256rmbk; break;
1784 case X86::VPBROADCASTQZrmk: Opc = X86::VPBLENDMQZrmbk; break;
1785 }
1786
1787 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1788 .add(Dest)
1789 .add(MI.getOperand(2))
1790 .add(Src)
1791 .add(MI.getOperand(3))
1792 .add(MI.getOperand(4))
1793 .add(MI.getOperand(5))
1794 .add(MI.getOperand(6))
1795 .add(MI.getOperand(7));
1796 break;
1797 }
1798
1799 case X86::VMOVDQU8Z128rrk:
1800 case X86::VMOVDQU8Z256rrk:
1801 case X86::VMOVDQU8Zrrk:
1802 case X86::VMOVDQU16Z128rrk:
1803 case X86::VMOVDQU16Z256rrk:
1804 case X86::VMOVDQU16Zrrk:
1805 case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
1806 case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
1807 case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
1808 case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
1809 case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
1810 case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
1811 case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
1812 case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
1813 case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
1814 case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
1815 case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
1816 case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
1817 unsigned Opc;
1818 switch (MIOpc) {
1819 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1819)
;
1820 case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
1821 case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
1822 case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
1823 case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
1824 case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
1825 case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
1826 case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
1827 case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
1828 case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
1829 case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
1830 case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
1831 case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
1832 case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
1833 case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
1834 case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
1835 case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
1836 case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
1837 case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
1838 case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
1839 case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
1840 case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
1841 case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
1842 case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
1843 case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
1844 case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
1845 case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
1846 case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
1847 case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
1848 case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
1849 case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
1850 }
1851
1852 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1853 .add(Dest)
1854 .add(MI.getOperand(2))
1855 .add(Src)
1856 .add(MI.getOperand(3));
1857 break;
1858 }
1859 }
1860
1861 if (!NewMI) return nullptr;
1862
1863 if (LV) { // Update live variables
1864 if (Src.isKill())
1865 LV->replaceKillInstruction(Src.getReg(), MI, *NewMI);
1866 if (Dest.isDead())
1867 LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);
1868 }
1869
1870 MachineBasicBlock &MBB = *MI.getParent();
1871 MBB.insert(MI.getIterator(), NewMI); // Insert the new inst
1872
1873 if (LIS) {
1874 LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
1875 if (SrcReg)
1876 LIS->getInterval(SrcReg);
1877 if (SrcReg2)
1878 LIS->getInterval(SrcReg2);
1879 }
1880
1881 return NewMI;
1882}
1883
1884/// This determines which of three possible cases of a three source commute
1885/// the source indexes correspond to taking into account any mask operands.
1886/// All prevents commuting a passthru operand. Returns -1 if the commute isn't
1887/// possible.
1888/// Case 0 - Possible to commute the first and second operands.
1889/// Case 1 - Possible to commute the first and third operands.
1890/// Case 2 - Possible to commute the second and third operands.
1891static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
1892 unsigned SrcOpIdx2) {
1893 // Put the lowest index to SrcOpIdx1 to simplify the checks below.
1894 if (SrcOpIdx1 > SrcOpIdx2)
1895 std::swap(SrcOpIdx1, SrcOpIdx2);
1896
1897 unsigned Op1 = 1, Op2 = 2, Op3 = 3;
1898 if (X86II::isKMasked(TSFlags)) {
1899 Op2++;
1900 Op3++;
1901 }
1902
1903 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
1904 return 0;
1905 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
1906 return 1;
1907 if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
1908 return 2;
1909 llvm_unreachable("Unknown three src commute case.")::llvm::llvm_unreachable_internal("Unknown three src commute case."
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1909)
;
1910}
1911
1912unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
1913 const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
1914 const X86InstrFMA3Group &FMA3Group) const {
1915
1916 unsigned Opc = MI.getOpcode();
1917
1918 // TODO: Commuting the 1st operand of FMA*_Int requires some additional
1919 // analysis. The commute optimization is legal only if all users of FMA*_Int
1920 // use only the lowest element of the FMA*_Int instruction. Such analysis are
1921 // not implemented yet. So, just return 0 in that case.
1922 // When such analysis are available this place will be the right place for
1923 // calling it.
1924 assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) &&(static_cast <bool> (!(FMA3Group.isIntrinsic() &&
(SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && "Intrinsic instructions can't commute operand 1"
) ? void (0) : __assert_fail ("!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && \"Intrinsic instructions can't commute operand 1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1925, __extension__
__PRETTY_FUNCTION__))
7
'?' condition is true
1925 "Intrinsic instructions can't commute operand 1")(static_cast <bool> (!(FMA3Group.isIntrinsic() &&
(SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && "Intrinsic instructions can't commute operand 1"
) ? void (0) : __assert_fail ("!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && \"Intrinsic instructions can't commute operand 1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1925, __extension__
__PRETTY_FUNCTION__))
;
1926
1927 // Determine which case this commute is or if it can't be done.
1928 unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
1929 SrcOpIdx2);
1930 assert(Case < 3 && "Unexpected case number!")(static_cast <bool> (Case < 3 && "Unexpected case number!"
) ? void (0) : __assert_fail ("Case < 3 && \"Unexpected case number!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1930, __extension__
__PRETTY_FUNCTION__))
;
8
'?' condition is true
1931
1932 // Define the FMA forms mapping array that helps to map input FMA form
1933 // to output FMA form to preserve the operation semantics after
1934 // commuting the operands.
1935 const unsigned Form132Index = 0;
1936 const unsigned Form213Index = 1;
1937 const unsigned Form231Index = 2;
1938 static const unsigned FormMapping[][3] = {
1939 // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
1940 // FMA132 A, C, b; ==> FMA231 C, A, b;
1941 // FMA213 B, A, c; ==> FMA213 A, B, c;
1942 // FMA231 C, A, b; ==> FMA132 A, C, b;
1943 { Form231Index, Form213Index, Form132Index },
1944 // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
1945 // FMA132 A, c, B; ==> FMA132 B, c, A;
1946 // FMA213 B, a, C; ==> FMA231 C, a, B;
1947 // FMA231 C, a, B; ==> FMA213 B, a, C;
1948 { Form132Index, Form231Index, Form213Index },
1949 // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
1950 // FMA132 a, C, B; ==> FMA213 a, B, C;
1951 // FMA213 b, A, C; ==> FMA132 b, C, A;
1952 // FMA231 c, A, B; ==> FMA231 c, B, A;
1953 { Form213Index, Form132Index, Form231Index }
1954 };
1955
1956 unsigned FMAForms[3];
1957 FMAForms[0] = FMA3Group.get132Opcode();
1958 FMAForms[1] = FMA3Group.get213Opcode();
1959 FMAForms[2] = FMA3Group.get231Opcode();
1960 unsigned FormIndex;
1961 for (FormIndex = 0; FormIndex < 3; FormIndex++)
9
Loop condition is true. Entering loop body
12
Loop condition is true. Entering loop body
15
Loop condition is true. Entering loop body
18
The value 3 is assigned to 'FormIndex'
19
Loop condition is false. Execution continues on line 1966
1962 if (Opc == FMAForms[FormIndex])
10
Assuming the condition is false
11
Taking false branch
13
Assuming the condition is false
14
Taking false branch
16
Assuming the condition is false
17
Taking false branch
1963 break;
1964
1965 // Everything is ready, just adjust the FMA opcode and return it.
1966 FormIndex = FormMapping[Case][FormIndex];
20
Assigned value is garbage or undefined
1967 return FMAForms[FormIndex];
1968}
1969
1970static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
1971 unsigned SrcOpIdx2) {
1972 // Determine which case this commute is or if it can't be done.
1973 unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
1974 SrcOpIdx2);
1975 assert(Case < 3 && "Unexpected case value!")(static_cast <bool> (Case < 3 && "Unexpected case value!"
) ? void (0) : __assert_fail ("Case < 3 && \"Unexpected case value!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1975, __extension__
__PRETTY_FUNCTION__))
;
1976
1977 // For each case we need to swap two pairs of bits in the final immediate.
1978 static const uint8_t SwapMasks[3][4] = {
1979 { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5.
1980 { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6.
1981 { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6.
1982 };
1983
1984 uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm();
1985 // Clear out the bits we are swapping.
1986 uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
1987 SwapMasks[Case][2] | SwapMasks[Case][3]);
1988 // If the immediate had a bit of the pair set, then set the opposite bit.
1989 if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1];
1990 if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0];
1991 if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3];
1992 if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2];
1993 MI.getOperand(MI.getNumOperands()-1).setImm(NewImm);
1994}
1995
1996// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be
1997// commuted.
1998static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
1999#define VPERM_CASES(Suffix) \
2000 case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \
2001 case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \
2002 case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \
2003 case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \
2004 case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \
2005 case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \
2006 case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \
2007 case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \
2008 case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \
2009 case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \
2010 case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \
2011 case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz:
2012
2013#define VPERM_CASES_BROADCAST(Suffix) \
2014 VPERM_CASES(Suffix) \
2015 case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \
2016 case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \
2017 case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \
2018 case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
2019 case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
2020 case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz:
2021
2022 switch (Opcode) {
2023 default: return false;
2024 VPERM_CASES(B)
2025 VPERM_CASES_BROADCAST(D)
2026 VPERM_CASES_BROADCAST(PD)
2027 VPERM_CASES_BROADCAST(PS)
2028 VPERM_CASES_BROADCAST(Q)
2029 VPERM_CASES(W)
2030 return true;
2031 }
2032#undef VPERM_CASES_BROADCAST
2033#undef VPERM_CASES
2034}
2035
2036// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
2037// from the I opcode to the T opcode and vice versa.
2038static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
2039#define VPERM_CASES(Orig, New) \
2040 case X86::Orig##128rr: return X86::New##128rr; \
2041 case X86::Orig##128rrkz: return X86::New##128rrkz; \
2042 case X86::Orig##128rm: return X86::New##128rm; \
2043 case X86::Orig##128rmkz: return X86::New##128rmkz; \
2044 case X86::Orig##256rr: return X86::New##256rr; \
2045 case X86::Orig##256rrkz: return X86::New##256rrkz; \
2046 case X86::Orig##256rm: return X86::New##256rm; \
2047 case X86::Orig##256rmkz: return X86::New##256rmkz; \
2048 case X86::Orig##rr: return X86::New##rr; \
2049 case X86::Orig##rrkz: return X86::New##rrkz; \
2050 case X86::Orig##rm: return X86::New##rm; \
2051 case X86::Orig##rmkz: return X86::New##rmkz;
2052
2053#define VPERM_CASES_BROADCAST(Orig, New) \
2054 VPERM_CASES(Orig, New) \
2055 case X86::Orig##128rmb: return X86::New##128rmb; \
2056 case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
2057 case X86::Orig##256rmb: return X86::New##256rmb; \
2058 case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
2059 case X86::Orig##rmb: return X86::New##rmb; \
2060 case X86::Orig##rmbkz: return X86::New##rmbkz;
2061
2062 switch (Opcode) {
2063 VPERM_CASES(VPERMI2B, VPERMT2B)
2064 VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
2065 VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
2066 VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
2067 VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
2068 VPERM_CASES(VPERMI2W, VPERMT2W)
2069 VPERM_CASES(VPERMT2B, VPERMI2B)
2070 VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
2071 VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
2072 VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
2073 VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
2074 VPERM_CASES(VPERMT2W, VPERMI2W)
2075 }
2076
2077 llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2077)
;
2078#undef VPERM_CASES_BROADCAST
2079#undef VPERM_CASES
2080}
2081
2082MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2083 unsigned OpIdx1,
2084 unsigned OpIdx2) const {
2085 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
2086 if (NewMI)
2087 return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
2088 return MI;
2089 };
2090
2091 switch (MI.getOpcode()) {
1
Control jumps to the 'default' case at line 2408
2092 case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
2093 case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
2094 case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
2095 case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
2096 case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
2097 case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
2098 unsigned Opc;
2099 unsigned Size;
2100 switch (MI.getOpcode()) {
2101 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2101)
;
2102 case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
2103 case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
2104 case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
2105 case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
2106 case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
2107 case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
2108 }
2109 unsigned Amt = MI.getOperand(3).getImm();
2110 auto &WorkingMI = cloneIfNew(MI);
2111 WorkingMI.setDesc(get(Opc));
2112 WorkingMI.getOperand(3).setImm(Size - Amt);
2113 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2114 OpIdx1, OpIdx2);
2115 }
2116 case X86::PFSUBrr:
2117 case X86::PFSUBRrr: {
2118 // PFSUB x, y: x = x - y
2119 // PFSUBR x, y: x = y - x
2120 unsigned Opc =
2121 (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr);
2122 auto &WorkingMI = cloneIfNew(MI);
2123 WorkingMI.setDesc(get(Opc));
2124 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2125 OpIdx1, OpIdx2);
2126 }
2127 case X86::BLENDPDrri:
2128 case X86::BLENDPSrri:
2129 case X86::VBLENDPDrri:
2130 case X86::VBLENDPSrri:
2131 // If we're optimizing for size, try to use MOVSD/MOVSS.
2132 if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
2133 unsigned Mask, Opc;
2134 switch (MI.getOpcode()) {
2135 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2135)
;
2136 case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break;
2137 case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break;
2138 case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break;
2139 case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break;
2140 }
2141 if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
2142 auto &WorkingMI = cloneIfNew(MI);
2143 WorkingMI.setDesc(get(Opc));
2144 WorkingMI.RemoveOperand(3);
2145 return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
2146 /*NewMI=*/false,
2147 OpIdx1, OpIdx2);
2148 }
2149 }
2150 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2151 case X86::PBLENDWrri:
2152 case X86::VBLENDPDYrri:
2153 case X86::VBLENDPSYrri:
2154 case X86::VPBLENDDrri:
2155 case X86::VPBLENDWrri:
2156 case X86::VPBLENDDYrri:
2157 case X86::VPBLENDWYrri:{
2158 int8_t Mask;
2159 switch (MI.getOpcode()) {
2160 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2160)
;
2161 case X86::BLENDPDrri: Mask = (int8_t)0x03; break;
2162 case X86::BLENDPSrri: Mask = (int8_t)0x0F; break;
2163 case X86::PBLENDWrri: Mask = (int8_t)0xFF; break;
2164 case X86::VBLENDPDrri: Mask = (int8_t)0x03; break;
2165 case X86::VBLENDPSrri: Mask = (int8_t)0x0F; break;
2166 case X86::VBLENDPDYrri: Mask = (int8_t)0x0F; break;
2167 case X86::VBLENDPSYrri: Mask = (int8_t)0xFF; break;
2168 case X86::VPBLENDDrri: Mask = (int8_t)0x0F; break;
2169 case X86::VPBLENDWrri: Mask = (int8_t)0xFF; break;
2170 case X86::VPBLENDDYrri: Mask = (int8_t)0xFF; break;
2171 case X86::VPBLENDWYrri: Mask = (int8_t)0xFF; break;
2172 }
2173 // Only the least significant bits of Imm are used.
2174 // Using int8_t to ensure it will be sign extended to the int64_t that
2175 // setImm takes in order to match isel behavior.
2176 int8_t Imm = MI.getOperand(3).getImm() & Mask;
2177 auto &WorkingMI = cloneIfNew(MI);
2178 WorkingMI.getOperand(3).setImm(Mask ^ Imm);
2179 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2180 OpIdx1, OpIdx2);
2181 }
2182 case X86::INSERTPSrr:
2183 case X86::VINSERTPSrr:
2184 case X86::VINSERTPSZrr: {
2185 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
2186 unsigned ZMask = Imm & 15;
2187 unsigned DstIdx = (Imm >> 4) & 3;
2188 unsigned SrcIdx = (Imm >> 6) & 3;
2189
2190 // We can commute insertps if we zero 2 of the elements, the insertion is
2191 // "inline" and we don't override the insertion with a zero.
2192 if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
2193 countPopulation(ZMask) == 2) {
2194 unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15);
2195 assert(AltIdx < 4 && "Illegal insertion index")(static_cast <bool> (AltIdx < 4 && "Illegal insertion index"
) ? void (0) : __assert_fail ("AltIdx < 4 && \"Illegal insertion index\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2195, __extension__
__PRETTY_FUNCTION__))
;
2196 unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
2197 auto &WorkingMI = cloneIfNew(MI);
2198 WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm);
2199 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2200 OpIdx1, OpIdx2);
2201 }
2202 return nullptr;
2203 }
2204 case X86::MOVSDrr:
2205 case X86::MOVSSrr:
2206 case X86::VMOVSDrr:
2207 case X86::VMOVSSrr:{
2208 // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
2209 if (Subtarget.hasSSE41()) {
2210 unsigned Mask, Opc;
2211 switch (MI.getOpcode()) {
2212 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2212)
;
2213 case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
2214 case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
2215 case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
2216 case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
2217 }
2218
2219 auto &WorkingMI = cloneIfNew(MI);
2220 WorkingMI.setDesc(get(Opc));
2221 WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
2222 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2223 OpIdx1, OpIdx2);
2224 }
2225
2226 // Convert to SHUFPD.
2227 assert(MI.getOpcode() == X86::MOVSDrr &&(static_cast <bool> (MI.getOpcode() == X86::MOVSDrr &&
"Can only commute MOVSDrr without SSE4.1") ? void (0) : __assert_fail
("MI.getOpcode() == X86::MOVSDrr && \"Can only commute MOVSDrr without SSE4.1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2228, __extension__
__PRETTY_FUNCTION__))
2228 "Can only commute MOVSDrr without SSE4.1")(static_cast <bool> (MI.getOpcode() == X86::MOVSDrr &&
"Can only commute MOVSDrr without SSE4.1") ? void (0) : __assert_fail
("MI.getOpcode() == X86::MOVSDrr && \"Can only commute MOVSDrr without SSE4.1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2228, __extension__
__PRETTY_FUNCTION__))
;
2229
2230 auto &WorkingMI = cloneIfNew(MI);
2231 WorkingMI.setDesc(get(X86::SHUFPDrri));
2232 WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
2233 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2234 OpIdx1, OpIdx2);
2235 }
2236 case X86::SHUFPDrri: {
2237 // Commute to MOVSD.
2238 assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!")(static_cast <bool> (MI.getOperand(3).getImm() == 0x02 &&
"Unexpected immediate!") ? void (0) : __assert_fail ("MI.getOperand(3).getImm() == 0x02 && \"Unexpected immediate!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2238, __extension__
__PRETTY_FUNCTION__))
;
2239 auto &WorkingMI = cloneIfNew(MI);
2240 WorkingMI.setDesc(get(X86::MOVSDrr));
2241 WorkingMI.RemoveOperand(3);
2242 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2243 OpIdx1, OpIdx2);
2244 }
2245 case X86::PCLMULQDQrr:
2246 case X86::VPCLMULQDQrr:
2247 case X86::VPCLMULQDQYrr:
2248 case X86::VPCLMULQDQZrr:
2249 case X86::VPCLMULQDQZ128rr:
2250 case X86::VPCLMULQDQZ256rr: {
2251 // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
2252 // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
2253 unsigned Imm = MI.getOperand(3).getImm();
2254 unsigned Src1Hi = Imm & 0x01;
2255 unsigned Src2Hi = Imm & 0x10;
2256 auto &WorkingMI = cloneIfNew(MI);
2257 WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
2258 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2259 OpIdx1, OpIdx2);
2260 }
2261 case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
2262 case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
2263 case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
2264 case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
2265 case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
2266 case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
2267 case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
2268 case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
2269 case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
2270 case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
2271 case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
2272 case X86::VPCMPWZrri: case X86::VPCMPUWZrri:
2273 case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik:
2274 case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik:
2275 case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik:
2276 case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik:
2277 case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik:
2278 case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik:
2279 case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik:
2280 case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik:
2281 case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik:
2282 case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik:
2283 case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik:
2284 case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: {
2285 // Flip comparison mode immediate (if necessary).
2286 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7;
2287 Imm = X86::getSwappedVPCMPImm(Imm);
2288 auto &WorkingMI = cloneIfNew(MI);
2289 WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
2290 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2291 OpIdx1, OpIdx2);
2292 }
2293 case X86::VPCOMBri: case X86::VPCOMUBri:
2294 case X86::VPCOMDri: case X86::VPCOMUDri:
2295 case X86::VPCOMQri: case X86::VPCOMUQri:
2296 case X86::VPCOMWri: case X86::VPCOMUWri: {
2297 // Flip comparison mode immediate (if necessary).
2298 unsigned Imm = MI.getOperand(3).getImm() & 0x7;
2299 Imm = X86::getSwappedVPCOMImm(Imm);
2300 auto &WorkingMI = cloneIfNew(MI);
2301 WorkingMI.getOperand(3).setImm(Imm);
2302 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2303 OpIdx1, OpIdx2);
2304 }
2305 case X86::VCMPSDZrr:
2306 case X86::VCMPSSZrr:
2307 case X86::VCMPPDZrri:
2308 case X86::VCMPPSZrri:
2309 case X86::VCMPSHZrr:
2310 case X86::VCMPPHZrri:
2311 case X86::VCMPPHZ128rri:
2312 case X86::VCMPPHZ256rri:
2313 case X86::VCMPPDZ128rri:
2314 case X86::VCMPPSZ128rri:
2315 case X86::VCMPPDZ256rri:
2316 case X86::VCMPPSZ256rri:
2317 case X86::VCMPPDZrrik:
2318 case X86::VCMPPSZrrik:
2319 case X86::VCMPPDZ128rrik:
2320 case X86::VCMPPSZ128rrik:
2321 case X86::VCMPPDZ256rrik:
2322 case X86::VCMPPSZ256rrik: {
2323 unsigned Imm =
2324 MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f;
2325 Imm = X86::getSwappedVCMPImm(Imm);
2326 auto &WorkingMI = cloneIfNew(MI);
2327 WorkingMI.getOperand(MI.getNumExplicitOperands() - 1).setImm(Imm);
2328 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2329 OpIdx1, OpIdx2);
2330 }
2331 case X86::VPERM2F128rr:
2332 case X86::VPERM2I128rr: {
2333 // Flip permute source immediate.
2334 // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
2335 // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
2336 int8_t Imm = MI.getOperand(3).getImm() & 0xFF;
2337 auto &WorkingMI = cloneIfNew(MI);
2338 WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
2339 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2340 OpIdx1, OpIdx2);
2341 }
2342 case X86::MOVHLPSrr:
2343 case X86::UNPCKHPDrr:
2344 case X86::VMOVHLPSrr:
2345 case X86::VUNPCKHPDrr:
2346 case X86::VMOVHLPSZrr:
2347 case X86::VUNPCKHPDZ128rr: {
2348 assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!")(static_cast <bool> (Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!"
) ? void (0) : __assert_fail ("Subtarget.hasSSE2() && \"Commuting MOVHLP/UNPCKHPD requires SSE2!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2348, __extension__
__PRETTY_FUNCTION__))
;
2349
2350 unsigned Opc = MI.getOpcode();
2351 switch (Opc) {
2352 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2352)
;
2353 case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
2354 case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
2355 case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break;
2356 case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break;
2357 case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break;
2358 case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break;
2359 }
2360 auto &WorkingMI = cloneIfNew(MI);
2361 WorkingMI.setDesc(get(Opc));
2362 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2363 OpIdx1, OpIdx2);
2364 }
2365 case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: {
2366 auto &WorkingMI = cloneIfNew(MI);
2367 unsigned OpNo = MI.getDesc().getNumOperands() - 1;
2368 X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
2369 WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC));
2370 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2371 OpIdx1, OpIdx2);
2372 }
2373 case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
2374 case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
2375 case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
2376 case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
2377 case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
2378 case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
2379 case X86::VPTERNLOGDZrrik:
2380 case X86::VPTERNLOGDZ128rrik:
2381 case X86::VPTERNLOGDZ256rrik:
2382 case X86::VPTERNLOGQZrrik:
2383 case X86::VPTERNLOGQZ128rrik:
2384 case X86::VPTERNLOGQZ256rrik:
2385 case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
2386 case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
2387 case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
2388 case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
2389 case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
2390 case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
2391 case X86::VPTERNLOGDZ128rmbi:
2392 case X86::VPTERNLOGDZ256rmbi:
2393 case X86::VPTERNLOGDZrmbi:
2394 case X86::VPTERNLOGQZ128rmbi:
2395 case X86::VPTERNLOGQZ256rmbi:
2396 case X86::VPTERNLOGQZrmbi:
2397 case X86::VPTERNLOGDZ128rmbikz:
2398 case X86::VPTERNLOGDZ256rmbikz:
2399 case X86::VPTERNLOGDZrmbikz:
2400 case X86::VPTERNLOGQZ128rmbikz:
2401 case X86::VPTERNLOGQZ256rmbikz:
2402 case X86::VPTERNLOGQZrmbikz: {
2403 auto &WorkingMI = cloneIfNew(MI);
2404 commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2);
2405 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2406 OpIdx1, OpIdx2);
2407 }
2408 default: {
2409 if (isCommutableVPERMV3Instruction(MI.getOpcode())) {
2
Assuming the condition is false
3
Taking false branch
2410 unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode());
2411 auto &WorkingMI = cloneIfNew(MI);
2412 WorkingMI.setDesc(get(Opc));
2413 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2414 OpIdx1, OpIdx2);
2415 }
2416
2417 const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
2418 MI.getDesc().TSFlags);
2419 if (FMA3Group) {
4
Assuming 'FMA3Group' is non-null
5
Taking true branch
2420 unsigned Opc =
2421 getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
6
Calling 'X86InstrInfo::getFMA3OpcodeToCommuteOperands'
2422 auto &WorkingMI = cloneIfNew(MI);
2423 WorkingMI.setDesc(get(Opc));
2424 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2425 OpIdx1, OpIdx2);
2426 }
2427
2428 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2429 }
2430 }
2431}
2432
2433bool
2434X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
2435 unsigned &SrcOpIdx1,
2436 unsigned &SrcOpIdx2,
2437 bool IsIntrinsic) const {
2438 uint64_t TSFlags = MI.getDesc().TSFlags;
2439
2440 unsigned FirstCommutableVecOp = 1;
2441 unsigned LastCommutableVecOp = 3;
2442 unsigned KMaskOp = -1U;
2443 if (X86II::isKMasked(TSFlags)) {
2444 // For k-zero-masked operations it is Ok to commute the first vector
2445 // operand. Unless this is an intrinsic instruction.
2446 // For regular k-masked operations a conservative choice is done as the
2447 // elements of the first vector operand, for which the corresponding bit
2448 // in the k-mask operand is set to 0, are copied to the result of the
2449 // instruction.
2450 // TODO/FIXME: The commute still may be legal if it is known that the
2451 // k-mask operand is set to either all ones or all zeroes.
2452 // It is also Ok to commute the 1st operand if all users of MI use only
2453 // the elements enabled by the k-mask operand. For example,
2454 // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
2455 // : v1[i];
2456 // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
2457 // // Ok, to commute v1 in FMADD213PSZrk.
2458
2459 // The k-mask operand has index = 2 for masked and zero-masked operations.
2460 KMaskOp = 2;
2461
2462 // The operand with index = 1 is used as a source for those elements for
2463 // which the corresponding bit in the k-mask is set to 0.
2464 if (X86II::isKMergeMasked(TSFlags) || IsIntrinsic)
2465 FirstCommutableVecOp = 3;
2466
2467 LastCommutableVecOp++;
2468 } else if (IsIntrinsic) {
2469 // Commuting the first operand of an intrinsic instruction isn't possible
2470 // unless we can prove that only the lowest element of the result is used.
2471 FirstCommutableVecOp = 2;
2472 }
2473
2474 if (isMem(MI, LastCommutableVecOp))
2475 LastCommutableVecOp--;
2476
2477 // Only the first RegOpsNum operands are commutable.
2478 // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
2479 // that the operand is not specified/fixed.
2480 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2481 (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
2482 SrcOpIdx1 == KMaskOp))
2483 return false;
2484 if (SrcOpIdx2 != CommuteAnyOperandIndex &&
2485 (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
2486 SrcOpIdx2 == KMaskOp))
2487 return false;
2488
2489 // Look for two different register operands assumed to be commutable
2490 // regardless of the FMA opcode. The FMA opcode is adjusted later.
2491 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2492 SrcOpIdx2 == CommuteAnyOperandIndex) {
2493 unsigned CommutableOpIdx2 = SrcOpIdx2;
2494
2495 // At least one of operands to be commuted is not specified and
2496 // this method is free to choose appropriate commutable operands.
2497 if (SrcOpIdx1 == SrcOpIdx2)
2498 // Both of operands are not fixed. By default set one of commutable
2499 // operands to the last register operand of the instruction.
2500 CommutableOpIdx2 = LastCommutableVecOp;
2501 else if (SrcOpIdx2 == CommuteAnyOperandIndex)
2502 // Only one of operands is not fixed.
2503 CommutableOpIdx2 = SrcOpIdx1;
2504
2505 // CommutableOpIdx2 is well defined now. Let's choose another commutable
2506 // operand and assign its index to CommutableOpIdx1.
2507 Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
2508
2509 unsigned CommutableOpIdx1;
2510 for (CommutableOpIdx1 = LastCommutableVecOp;
2511 CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
2512 // Just ignore and skip the k-mask operand.
2513 if (CommutableOpIdx1 == KMaskOp)
2514 continue;
2515
2516 // The commuted operands must have different registers.
2517 // Otherwise, the commute transformation does not change anything and
2518 // is useless then.
2519 if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg())
2520 break;
2521 }
2522
2523 // No appropriate commutable operands were found.
2524 if (CommutableOpIdx1 < FirstCommutableVecOp)
2525 return false;
2526
2527 // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
2528 // to return those values.
2529 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2530 CommutableOpIdx1, CommutableOpIdx2))
2531 return false;
2532 }
2533
2534 return true;
2535}
2536
2537bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
2538 unsigned &SrcOpIdx1,
2539 unsigned &SrcOpIdx2) const {
2540 const MCInstrDesc &Desc = MI.getDesc();
2541 if (!Desc.isCommutable())
2542 return false;
2543
2544 switch (MI.getOpcode()) {
2545 case X86::CMPSDrr:
2546 case X86::CMPSSrr:
2547 case X86::CMPPDrri:
2548 case X86::CMPPSrri:
2549 case X86::VCMPSDrr:
2550 case X86::VCMPSSrr:
2551 case X86::VCMPPDrri:
2552 case X86::VCMPPSrri:
2553 case X86::VCMPPDYrri:
2554 case X86::VCMPPSYrri:
2555 case X86::VCMPSDZrr:
2556 case X86::VCMPSSZrr:
2557 case X86::VCMPPDZrri:
2558 case X86::VCMPPSZrri:
2559 case X86::VCMPSHZrr:
2560 case X86::VCMPPHZrri:
2561 case X86::VCMPPHZ128rri:
2562 case X86::VCMPPHZ256rri:
2563 case X86::VCMPPDZ128rri:
2564 case X86::VCMPPSZ128rri:
2565 case X86::VCMPPDZ256rri:
2566 case X86::VCMPPSZ256rri:
2567 case X86::VCMPPDZrrik:
2568 case X86::VCMPPSZrrik:
2569 case X86::VCMPPDZ128rrik:
2570 case X86::VCMPPSZ128rrik:
2571 case X86::VCMPPDZ256rrik:
2572 case X86::VCMPPSZ256rrik: {
2573 unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0;
2574
2575 // Float comparison can be safely commuted for
2576 // Ordered/Unordered/Equal/NotEqual tests
2577 unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
2578 switch (Imm) {
2579 default:
2580 // EVEX versions can be commuted.
2581 if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX)
2582 break;
2583 return false;
2584 case 0x00: // EQUAL
2585 case 0x03: // UNORDERED
2586 case 0x04: // NOT EQUAL
2587 case 0x07: // ORDERED
2588 break;
2589 }
2590
2591 // The indices of the commutable operands are 1 and 2 (or 2 and 3
2592 // when masked).
2593 // Assign them to the returned operand indices here.
2594 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
2595 2 + OpOffset);
2596 }
2597 case X86::MOVSSrr:
2598 // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
2599 // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
2600 // AVX implies sse4.1.
2601 if (Subtarget.hasSSE41())
2602 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2603 return false;
2604 case X86::SHUFPDrri:
2605 // We can commute this to MOVSD.
2606 if (MI.getOperand(3).getImm() == 0x02)
2607 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2608 return false;
2609 case X86::MOVHLPSrr:
2610 case X86::UNPCKHPDrr:
2611 case X86::VMOVHLPSrr:
2612 case X86::VUNPCKHPDrr:
2613 case X86::VMOVHLPSZrr:
2614 case X86::VUNPCKHPDZ128rr:
2615 if (Subtarget.hasSSE2())
2616 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2617 return false;
2618 case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
2619 case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
2620 case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
2621 case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
2622 case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
2623 case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
2624 case X86::VPTERNLOGDZrrik:
2625 case X86::VPTERNLOGDZ128rrik:
2626 case X86::VPTERNLOGDZ256rrik:
2627 case X86::VPTERNLOGQZrrik:
2628 case X86::VPTERNLOGQZ128rrik:
2629 case X86::VPTERNLOGQZ256rrik:
2630 case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
2631 case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
2632 case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
2633 case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
2634 case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
2635 case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
2636 case X86::VPTERNLOGDZ128rmbi:
2637 case X86::VPTERNLOGDZ256rmbi:
2638 case X86::VPTERNLOGDZrmbi:
2639 case X86::VPTERNLOGQZ128rmbi:
2640 case X86::VPTERNLOGQZ256rmbi:
2641 case X86::VPTERNLOGQZrmbi:
2642 case X86::VPTERNLOGDZ128rmbikz:
2643 case X86::VPTERNLOGDZ256rmbikz:
2644 case X86::VPTERNLOGDZrmbikz:
2645 case X86::VPTERNLOGQZ128rmbikz:
2646 case X86::VPTERNLOGQZ256rmbikz:
2647 case X86::VPTERNLOGQZrmbikz:
2648 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2649 case X86::VPDPWSSDYrr:
2650 case X86::VPDPWSSDrr:
2651 case X86::VPDPWSSDSYrr:
2652 case X86::VPDPWSSDSrr:
2653 case X86::VPDPWSSDZ128r:
2654 case X86::VPDPWSSDZ128rk:
2655 case X86::VPDPWSSDZ128rkz:
2656 case X86::VPDPWSSDZ256r:
2657 case X86::VPDPWSSDZ256rk:
2658 case X86::VPDPWSSDZ256rkz:
2659 case X86::VPDPWSSDZr:
2660 case X86::VPDPWSSDZrk:
2661 case X86::VPDPWSSDZrkz:
2662 case X86::VPDPWSSDSZ128r:
2663 case X86::VPDPWSSDSZ128rk:
2664 case X86::VPDPWSSDSZ128rkz:
2665 case X86::VPDPWSSDSZ256r:
2666 case X86::VPDPWSSDSZ256rk:
2667 case X86::VPDPWSSDSZ256rkz:
2668 case X86::VPDPWSSDSZr:
2669 case X86::VPDPWSSDSZrk:
2670 case X86::VPDPWSSDSZrkz:
2671 case X86::VPMADD52HUQZ128r:
2672 case X86::VPMADD52HUQZ128rk:
2673 case X86::VPMADD52HUQZ128rkz:
2674 case X86::VPMADD52HUQZ256r:
2675 case X86::VPMADD52HUQZ256rk:
2676 case X86::VPMADD52HUQZ256rkz:
2677 case X86::VPMADD52HUQZr:
2678 case X86::VPMADD52HUQZrk:
2679 case X86::VPMADD52HUQZrkz:
2680 case X86::VPMADD52LUQZ128r:
2681 case X86::VPMADD52LUQZ128rk:
2682 case X86::VPMADD52LUQZ128rkz:
2683 case X86::VPMADD52LUQZ256r:
2684 case X86::VPMADD52LUQZ256rk:
2685 case X86::VPMADD52LUQZ256rkz:
2686 case X86::VPMADD52LUQZr:
2687 case X86::VPMADD52LUQZrk:
2688 case X86::VPMADD52LUQZrkz:
2689 case X86::VFMADDCPHZr:
2690 case X86::VFMADDCPHZrk:
2691 case X86::VFMADDCPHZrkz:
2692 case X86::VFMADDCPHZ128r:
2693 case X86::VFMADDCPHZ128rk:
2694 case X86::VFMADDCPHZ128rkz:
2695 case X86::VFMADDCPHZ256r:
2696 case X86::VFMADDCPHZ256rk:
2697 case X86::VFMADDCPHZ256rkz:
2698 case X86::VFMADDCSHZr:
2699 case X86::VFMADDCSHZrk:
2700 case X86::VFMADDCSHZrkz: {
2701 unsigned CommutableOpIdx1 = 2;
2702 unsigned CommutableOpIdx2 = 3;
2703 if (X86II::isKMasked(Desc.TSFlags)) {
2704 // Skip the mask register.
2705 ++CommutableOpIdx1;
2706 ++CommutableOpIdx2;
2707 }
2708 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2709 CommutableOpIdx1, CommutableOpIdx2))
2710 return false;
2711 if (!MI.getOperand(SrcOpIdx1).isReg() ||
2712 !MI.getOperand(SrcOpIdx2).isReg())
2713 // No idea.
2714 return false;
2715 return true;
2716 }
2717
2718 default:
2719 const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
2720 MI.getDesc().TSFlags);
2721 if (FMA3Group)
2722 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
2723 FMA3Group->isIntrinsic());
2724
2725 // Handled masked instructions since we need to skip over the mask input
2726 // and the preserved input.
2727 if (X86II::isKMasked(Desc.TSFlags)) {
2728 // First assume that the first input is the mask operand and skip past it.
2729 unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
2730 unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
2731 // Check if the first input is tied. If there isn't one then we only
2732 // need to skip the mask operand which we did above.
2733 if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
2734 MCOI::TIED_TO) != -1)) {
2735 // If this is zero masking instruction with a tied operand, we need to
2736 // move the first index back to the first input since this must
2737 // be a 3 input instruction and we want the first two non-mask inputs.
2738 // Otherwise this is a 2 input instruction with a preserved input and
2739 // mask, so we need to move the indices to skip one more input.
2740 if (X86II::isKMergeMasked(Desc.TSFlags)) {
2741 ++CommutableOpIdx1;
2742 ++CommutableOpIdx2;
2743 } else {
2744 --CommutableOpIdx1;
2745 }
2746 }
2747
2748 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2749 CommutableOpIdx1, CommutableOpIdx2))
2750 return false;
2751
2752 if (!MI.getOperand(SrcOpIdx1).isReg() ||
2753 !MI.getOperand(SrcOpIdx2).isReg())
2754 // No idea.
2755 return false;
2756 return true;
2757 }
2758
2759 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2760 }
2761 return false;
2762}
2763
2764static bool isConvertibleLEA(MachineInstr *MI) {
2765 unsigned Opcode = MI->getOpcode();
2766 if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
2767 Opcode != X86::LEA64_32r)
2768 return false;
2769
2770 const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
2771 const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
2772 const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
2773
2774 if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
2775 Scale.getImm() > 1)
2776 return false;
2777
2778 return true;
2779}
2780
2781bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
2782 // Currently we're interested in following sequence only.
2783 // r3 = lea r1, r2
2784 // r5 = add r3, r4
2785 // Both r3 and r4 are killed in add, we hope the add instruction has the
2786 // operand order
2787 // r5 = add r4, r3
2788 // So later in X86FixupLEAs the lea instruction can be rewritten as add.
2789 unsigned Opcode = MI.getOpcode();
2790 if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
2791 return false;
2792
2793 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2794 Register Reg1 = MI.getOperand(1).getReg();
2795 Register Reg2 = MI.getOperand(2).getReg();
2796
2797 // Check if Reg1 comes from LEA in the same MBB.
2798 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) {
2799 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
2800 Commute = true;
2801 return true;
2802 }
2803 }
2804
2805 // Check if Reg2 comes from LEA in the same MBB.
2806 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) {
2807 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
2808 Commute = false;
2809 return true;
2810 }
2811 }
2812
2813 return false;
2814}
2815
2816X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
2817 switch (MI.getOpcode()) {
2818 default: return X86::COND_INVALID;
2819 case X86::JCC_1:
2820 return static_cast<X86::CondCode>(
2821 MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
2822 }
2823}
2824
2825/// Return condition code of a SETCC opcode.
2826X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) {
2827 switch (MI.getOpcode()) {
2828 default: return X86::COND_INVALID;
2829 case X86::SETCCr: case X86::SETCCm:
2830 return static_cast<X86::CondCode>(
2831 MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
2832 }
2833}
2834
2835/// Return condition code of a CMov opcode.
2836X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) {
2837 switch (MI.getOpcode()) {
2838 default: return X86::COND_INVALID;
2839 case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr:
2840 case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm:
2841 return static_cast<X86::CondCode>(
2842 MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
2843 }
2844}
2845
2846/// Return the inverse of the specified condition,
2847/// e.g. turning COND_E to COND_NE.
2848X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
2849 switch (CC) {
2850 default: llvm_unreachable("Illegal condition code!")::llvm::llvm_unreachable_internal("Illegal condition code!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2850)
;
2851 case X86::COND_E: return X86::COND_NE;
2852 case X86::COND_NE: return X86::COND_E;
2853 case X86::COND_L: return X86::COND_GE;
2854 case X86::COND_LE: return X86::COND_G;
2855 case X86::COND_G: return X86::COND_LE;
2856 case X86::COND_GE: return X86::COND_L;
2857 case X86::COND_B: return X86::COND_AE;
2858 case X86::COND_BE: return X86::COND_A;
2859 case X86::COND_A: return X86::COND_BE;
2860 case X86::COND_AE: return X86::COND_B;
2861 case X86::COND_S: return X86::COND_NS;
2862 case X86::COND_NS: return X86::COND_S;
2863 case X86::COND_P: return X86::COND_NP;
2864 case X86::COND_NP: return X86::COND_P;
2865 case X86::COND_O: return X86::COND_NO;
2866 case X86::COND_NO: return X86::COND_O;
2867 case X86::COND_NE_OR_P: return X86::COND_E_AND_NP;
2868 case X86::COND_E_AND_NP: return X86::COND_NE_OR_P;
2869 }
2870}
2871
2872/// Assuming the flags are set by MI(a,b), return the condition code if we
2873/// modify the instructions such that flags are set by MI(b,a).
2874static X86::CondCode getSwappedCondition(X86::CondCode CC) {
2875 switch (CC) {
2876 default: return X86::COND_INVALID;
2877 case X86::COND_E: return X86::COND_E;
2878 case X86::COND_NE: return X86::COND_NE;
2879 case X86::COND_L: return X86::COND_G;
2880 case X86::COND_LE: return X86::COND_GE;
2881 case X86::COND_G: return X86::COND_L;
2882 case X86::COND_GE: return X86::COND_LE;
2883 case X86::COND_B: return X86::COND_A;
2884 case X86::COND_BE: return X86::COND_AE;
2885 case X86::COND_A: return X86::COND_B;
2886 case X86::COND_AE: return X86::COND_BE;
2887 }
2888}
2889
2890std::pair<X86::CondCode, bool>
2891X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
2892 X86::CondCode CC = X86::COND_INVALID;
2893 bool NeedSwap = false;
2894 switch (Predicate) {
2895 default: break;
2896 // Floating-point Predicates
2897 case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
2898 case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2899 case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
2900 case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2901 case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
2902 case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2903 case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
2904 case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2905 case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
2906 case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
2907 case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
2908 case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
2909 case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH[[gnu::fallthrough]];
2910 case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
2911
2912 // Integer Predicates
2913 case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
2914 case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
2915 case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
2916 case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
2917 case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
2918 case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
2919 case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
2920 case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
2921 case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
2922 case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
2923 }
2924
2925 return std::make_pair(CC, NeedSwap);
2926}
2927
2928/// Return a cmov opcode for the given register size in bytes, and operand type.
2929unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) {
2930 switch(RegBytes) {
2931 default: llvm_unreachable("Illegal register size!")::llvm::llvm_unreachable_internal("Illegal register size!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2931)
;
2932 case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr;
2933 case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr;
2934 case 8: return HasMemoryOperand ? X86::CMOV64rm : X86::CMOV64rr;
2935 }
2936}
2937
2938/// Get the VPCMP immediate for the given condition.
2939unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) {
2940 switch (CC) {
2941 default: llvm_unreachable("Unexpected SETCC condition")::llvm::llvm_unreachable_internal("Unexpected SETCC condition"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2941)
;
2942 case ISD::SETNE: return 4;
2943 case ISD::SETEQ: return 0;
2944 case ISD::SETULT:
2945 case ISD::SETLT: return 1;
2946 case ISD::SETUGT:
2947 case ISD::SETGT: return 6;
2948 case ISD::SETUGE:
2949 case ISD::SETGE: return 5;
2950 case ISD::SETULE:
2951 case ISD::SETLE: return 2;
2952 }
2953}
2954
2955/// Get the VPCMP immediate if the operands are swapped.
2956unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
2957 switch (Imm) {
2958 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2958)
;
2959 case 0x01: Imm = 0x06; break; // LT -> NLE
2960 case 0x02: Imm = 0x05; break; // LE -> NLT
2961 case 0x05: Imm = 0x02; break; // NLT -> LE
2962 case 0x06: Imm = 0x01; break; // NLE -> LT
2963 case 0x00: // EQ
2964 case 0x03: // FALSE
2965 case 0x04: // NE
2966 case 0x07: // TRUE
2967 break;
2968 }
2969
2970 return Imm;
2971}
2972
2973/// Get the VPCOM immediate if the operands are swapped.
2974unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
2975 switch (Imm) {
2976 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2976)
;
2977 case 0x00: Imm = 0x02; break; // LT -> GT
2978 case 0x01: Imm = 0x03; break; // LE -> GE
2979 case 0x02: Imm = 0x00; break; // GT -> LT
2980 case 0x03: Imm = 0x01; break; // GE -> LE
2981 case 0x04: // EQ
2982 case 0x05: // NE
2983 case 0x06: // FALSE
2984 case 0x07: // TRUE
2985 break;
2986 }
2987
2988 return Imm;
2989}
2990
2991/// Get the VCMP immediate if the operands are swapped.
2992unsigned X86::getSwappedVCMPImm(unsigned Imm) {
2993 // Only need the lower 2 bits to distinquish.
2994 switch (Imm & 0x3) {
2995 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2995)
;
2996 case 0x00: case 0x03:
2997 // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted.
2998 break;
2999 case 0x01: case 0x02:
3000 // Need to toggle bits 3:0. Bit 4 stays the same.
3001 Imm ^= 0xf;
3002 break;
3003 }
3004
3005 return Imm;
3006}
3007
3008/// Return true if the Reg is X87 register.
3009static bool isX87Reg(unsigned Reg) {
3010 return (Reg == X86::FPCW || Reg == X86::FPSW ||
3011 (Reg >= X86::ST0 && Reg <= X86::ST7));
3012}
3013
3014/// check if the instruction is X87 instruction
3015bool X86::isX87Instruction(MachineInstr &MI) {
3016 for (const MachineOperand &MO : MI.operands()) {
3017 if (!MO.isReg())
3018 continue;
3019 if (isX87Reg(MO.getReg()))
3020 return true;
3021 }
3022 return false;
3023}
3024
3025bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
3026 switch (MI.getOpcode()) {
3027 case X86::TCRETURNdi:
3028 case X86::TCRETURNri:
3029 case X86::TCRETURNmi:
3030 case X86::TCRETURNdi64:
3031 case X86::TCRETURNri64:
3032 case X86::TCRETURNmi64:
3033 return true;
3034 default:
3035 return false;
3036 }
3037}
3038
3039bool X86InstrInfo::canMakeTailCallConditional(
3040 SmallVectorImpl<MachineOperand> &BranchCond,
3041 const MachineInstr &TailCall) const {
3042 if (TailCall.getOpcode() != X86::TCRETURNdi &&
3043 TailCall.getOpcode() != X86::TCRETURNdi64) {
3044 // Only direct calls can be done with a conditional branch.
3045 return false;
3046 }
3047
3048 const MachineFunction *MF = TailCall.getParent()->getParent();
3049 if (Subtarget.isTargetWin64() && MF->hasWinCFI()) {
3050 // Conditional tail calls confuse the Win64 unwinder.
3051 return false;
3052 }
3053
3054 assert(BranchCond.size() == 1)(static_cast <bool> (BranchCond.size() == 1) ? void (0)
: __assert_fail ("BranchCond.size() == 1", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3054, __extension__ __PRETTY_FUNCTION__))
;
3055 if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
3056 // Can't make a conditional tail call with this condition.
3057 return false;
3058 }
3059
3060 const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
3061 if (X86FI->getTCReturnAddrDelta() != 0 ||
3062 TailCall.getOperand(1).getImm() != 0) {
3063 // A conditional tail call cannot do any stack adjustment.
3064 return false;
3065 }
3066
3067 return true;
3068}
3069
3070void X86InstrInfo::replaceBranchWithTailCall(
3071 MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
3072 const MachineInstr &TailCall) const {
3073 assert(canMakeTailCallConditional(BranchCond, TailCall))(static_cast <bool> (canMakeTailCallConditional(BranchCond
, TailCall)) ? void (0) : __assert_fail ("canMakeTailCallConditional(BranchCond, TailCall)"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3073, __extension__
__PRETTY_FUNCTION__))
;
3074
3075 MachineBasicBlock::iterator I = MBB.end();
3076 while (I != MBB.begin()) {
3077 --I;
3078 if (I->isDebugInstr())
3079 continue;
3080 if (!I->isBranch())
3081 assert(0 && "Can't find the branch to replace!")(static_cast <bool> (0 && "Can't find the branch to replace!"
) ? void (0) : __assert_fail ("0 && \"Can't find the branch to replace!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3081, __extension__
__PRETTY_FUNCTION__))
;
3082
3083 X86::CondCode CC = X86::getCondFromBranch(*I);
3084 assert(BranchCond.size() == 1)(static_cast <bool> (BranchCond.size() == 1) ? void (0)
: __assert_fail ("BranchCond.size() == 1", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3084, __extension__ __PRETTY_FUNCTION__))
;
3085 if (CC != BranchCond[0].getImm())
3086 continue;
3087
3088 break;
3089 }
3090
3091 unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
3092 : X86::TCRETURNdi64cc;
3093
3094 auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
3095 MIB->addOperand(TailCall.getOperand(0)); // Destination.
3096 MIB.addImm(0); // Stack offset (not used).
3097 MIB->addOperand(BranchCond[0]); // Condition.
3098 MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
3099
3100 // Add implicit uses and defs of all live regs potentially clobbered by the
3101 // call. This way they still appear live across the call.
3102 LivePhysRegs LiveRegs(getRegisterInfo());
3103 LiveRegs.addLiveOuts(MBB);
3104 SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;
3105 LiveRegs.stepForward(*MIB, Clobbers);
3106 for (const auto &C : Clobbers) {
3107 MIB.addReg(C.first, RegState::Implicit);
3108 MIB.addReg(C.first, RegState::Implicit | RegState::Define);
3109 }
3110
3111 I->eraseFromParent();
3112}
3113
3114// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
3115// not be a fallthrough MBB now due to layout changes). Return nullptr if the
3116// fallthrough MBB cannot be identified.
3117static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB,
3118 MachineBasicBlock *TBB) {
3119 // Look for non-EHPad successors other than TBB. If we find exactly one, it
3120 // is the fallthrough MBB. If we find zero, then TBB is both the target MBB
3121 // and fallthrough MBB. If we find more than one, we cannot identify the
3122 // fallthrough MBB and should return nullptr.
3123 MachineBasicBlock *FallthroughBB = nullptr;
3124 for (MachineBasicBlock *Succ : MBB->successors()) {
3125 if (Succ->isEHPad() || (Succ == TBB && FallthroughBB))
3126 continue;
3127 // Return a nullptr if we found more than one fallthrough successor.
3128 if (FallthroughBB && FallthroughBB != TBB)
3129 return nullptr;
3130 FallthroughBB = Succ;
3131 }
3132 return FallthroughBB;
3133}
3134
3135bool X86InstrInfo::AnalyzeBranchImpl(
3136 MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
3137 SmallVectorImpl<MachineOperand> &Cond,
3138 SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
3139
3140 // Start from the bottom of the block and work up, examining the
3141 // terminator instructions.
3142 MachineBasicBlock::iterator I = MBB.end();
3143 MachineBasicBlock::iterator UnCondBrIter = MBB.end();
3144 while (I != MBB.begin()) {
3145 --I;
3146 if (I->isDebugInstr())
3147 continue;
3148
3149 // Working from the bottom, when we see a non-terminator instruction, we're
3150 // done.
3151 if (!isUnpredicatedTerminator(*I))
3152 break;
3153
3154 // A terminator that isn't a branch can't easily be handled by this
3155 // analysis.
3156 if (!I->isBranch())
3157 return true;
3158
3159 // Handle unconditional branches.
3160 if (I->getOpcode() == X86::JMP_1) {
3161 UnCondBrIter = I;
3162
3163 if (!AllowModify) {
3164 TBB = I->getOperand(0).getMBB();
3165 continue;
3166 }
3167
3168 // If the block has any instructions after a JMP, delete them.
3169 while (std::next(I) != MBB.end())
3170 std::next(I)->eraseFromParent();
3171
3172 Cond.clear();
3173 FBB = nullptr;
3174
3175 // Delete the JMP if it's equivalent to a fall-through.
3176 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
3177 TBB = nullptr;
3178 I->eraseFromParent();
3179 I = MBB.end();
3180 UnCondBrIter = MBB.end();
3181 continue;
3182 }
3183
3184 // TBB is used to indicate the unconditional destination.
3185 TBB = I->getOperand(0).getMBB();
3186 continue;
3187 }
3188
3189 // Handle conditional branches.
3190 X86::CondCode BranchCode = X86::getCondFromBranch(*I);
3191 if (BranchCode == X86::COND_INVALID)
3192 return true; // Can't handle indirect branch.
3193
3194 // In practice we should never have an undef eflags operand, if we do
3195 // abort here as we are not prepared to preserve the flag.
3196 if (I->findRegisterUseOperand(X86::EFLAGS)->isUndef())
3197 return true;
3198
3199 // Working from the bottom, handle the first conditional branch.
3200 if (Cond.empty()) {
3201 MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
3202 if (AllowModify && UnCondBrIter != MBB.end() &&
3203 MBB.isLayoutSuccessor(TargetBB)) {
3204 // If we can modify the code and it ends in something like:
3205 //
3206 // jCC L1
3207 // jmp L2
3208 // L1:
3209 // ...
3210 // L2:
3211 //
3212 // Then we can change this to:
3213 //
3214 // jnCC L2
3215 // L1:
3216 // ...
3217 // L2:
3218 //
3219 // Which is a bit more efficient.
3220 // We conditionally jump to the fall-through block.
3221 BranchCode = GetOppositeBranchCondition(BranchCode);
3222 MachineBasicBlock::iterator OldInst = I;
3223
3224 BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JCC_1))
3225 .addMBB(UnCondBrIter->getOperand(0).getMBB())
3226 .addImm(BranchCode);
3227 BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
3228 .addMBB(TargetBB);
3229
3230 OldInst->eraseFromParent();
3231 UnCondBrIter->eraseFromParent();
3232
3233 // Restart the analysis.
3234 UnCondBrIter = MBB.end();
3235 I = MBB.end();
3236 continue;
3237 }
3238
3239 FBB = TBB;
3240 TBB = I->getOperand(0).getMBB();
3241 Cond.push_back(MachineOperand::CreateImm(BranchCode));
3242 CondBranches.push_back(&*I);
3243 continue;
3244 }
3245
3246 // Handle subsequent conditional branches. Only handle the case where all
3247 // conditional branches branch to the same destination and their condition
3248 // opcodes fit one of the special multi-branch idioms.
3249 assert(Cond.size() == 1)(static_cast <bool> (Cond.size() == 1) ? void (0) : __assert_fail
("Cond.size() == 1", "llvm/lib/Target/X86/X86InstrInfo.cpp",
3249, __extension__ __PRETTY_FUNCTION__))
;
3250 assert(TBB)(static_cast <bool> (TBB) ? void (0) : __assert_fail ("TBB"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3250, __extension__
__PRETTY_FUNCTION__))
;
3251
3252 // If the conditions are the same, we can leave them alone.
3253 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
3254 auto NewTBB = I->getOperand(0).getMBB();
3255 if (OldBranchCode == BranchCode && TBB == NewTBB)
3256 continue;
3257
3258 // If they differ, see if they fit one of the known patterns. Theoretically,
3259 // we could handle more patterns here, but we shouldn't expect to see them
3260 // if instruction selection has done a reasonable job.
3261 if (TBB == NewTBB &&
3262 ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
3263 (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
3264 BranchCode = X86::COND_NE_OR_P;
3265 } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
3266 (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
3267 if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB)))
3268 return true;
3269
3270 // X86::COND_E_AND_NP usually has two different branch destinations.
3271 //
3272 // JP B1
3273 // JE B2
3274 // JMP B1
3275 // B1:
3276 // B2:
3277 //
3278 // Here this condition branches to B2 only if NP && E. It has another
3279 // equivalent form:
3280 //
3281 // JNE B1
3282 // JNP B2
3283 // JMP B1
3284 // B1:
3285 // B2:
3286 //
3287 // Similarly it branches to B2 only if E && NP. That is why this condition
3288 // is named with COND_E_AND_NP.
3289 BranchCode = X86::COND_E_AND_NP;
3290 } else
3291 return true;
3292
3293 // Update the MachineOperand.
3294 Cond[0].setImm(BranchCode);
3295 CondBranches.push_back(&*I);
3296 }
3297
3298 return false;
3299}
3300
3301bool X86InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
3302 MachineBasicBlock *&TBB,
3303 MachineBasicBlock *&FBB,
3304 SmallVectorImpl<MachineOperand> &Cond,
3305 bool AllowModify) const {
3306 SmallVector<MachineInstr *, 4> CondBranches;
3307 return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
3308}
3309
3310bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
3311 MachineBranchPredicate &MBP,
3312 bool AllowModify) const {
3313 using namespace std::placeholders;
3314
3315 SmallVector<MachineOperand, 4> Cond;
3316 SmallVector<MachineInstr *, 4> CondBranches;
3317 if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
3318 AllowModify))
3319 return true;
3320
3321 if (Cond.size() != 1)
3322 return true;
3323
3324 assert(MBP.TrueDest && "expected!")(static_cast <bool> (MBP.TrueDest && "expected!"
) ? void (0) : __assert_fail ("MBP.TrueDest && \"expected!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3324, __extension__
__PRETTY_FUNCTION__))
;
3325
3326 if (!MBP.FalseDest)
3327 MBP.FalseDest = MBB.getNextNode();
3328
3329 const TargetRegisterInfo *TRI = &getRegisterInfo();
3330
3331 MachineInstr *ConditionDef = nullptr;
3332 bool SingleUseCondition = true;
3333
3334 for (MachineInstr &MI : llvm::drop_begin(llvm::reverse(MBB))) {
3335 if (MI.modifiesRegister(X86::EFLAGS, TRI)) {
3336 ConditionDef = &MI;
3337 break;
3338 }
3339
3340 if (MI.readsRegister(X86::EFLAGS, TRI))
3341 SingleUseCondition = false;
3342 }
3343
3344 if (!ConditionDef)
3345 return true;
3346
3347 if (SingleUseCondition) {
3348 for (auto *Succ : MBB.successors())
3349 if (Succ->isLiveIn(X86::EFLAGS))
3350 SingleUseCondition = false;
3351 }
3352
3353 MBP.ConditionDef = ConditionDef;
3354 MBP.SingleUseCondition = SingleUseCondition;
3355
3356 // Currently we only recognize the simple pattern:
3357 //
3358 // test %reg, %reg
3359 // je %label
3360 //
3361 const unsigned TestOpcode =
3362 Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
3363
3364 if (ConditionDef->getOpcode() == TestOpcode &&
3365 ConditionDef->getNumOperands() == 3 &&
3366 ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
3367 (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
3368 MBP.LHS = ConditionDef->getOperand(0);
3369 MBP.RHS = MachineOperand::CreateImm(0);
3370 MBP.Predicate = Cond[0].getImm() == X86::COND_NE
3371 ? MachineBranchPredicate::PRED_NE
3372 : MachineBranchPredicate::PRED_EQ;
3373 return false;
3374 }
3375
3376 return true;
3377}
3378
3379unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
3380 int *BytesRemoved) const {
3381 assert(!BytesRemoved && "code size not handled")(static_cast <bool> (!BytesRemoved && "code size not handled"
) ? void (0) : __assert_fail ("!BytesRemoved && \"code size not handled\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3381, __extension__
__PRETTY_FUNCTION__))
;
3382
3383 MachineBasicBlock::iterator I = MBB.end();
3384 unsigned Count = 0;
3385
3386 while (I != MBB.begin()) {
3387 --I;
3388 if (I->isDebugInstr())
3389 continue;
3390 if (I->getOpcode() != X86::JMP_1 &&
3391 X86::getCondFromBranch(*I) == X86::COND_INVALID)
3392 break;
3393 // Remove the branch.
3394 I->eraseFromParent();
3395 I = MBB.end();
3396 ++Count;
3397 }
3398
3399 return Count;
3400}
3401
3402unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
3403 MachineBasicBlock *TBB,
3404 MachineBasicBlock *FBB,
3405 ArrayRef<MachineOperand> Cond,
3406 const DebugLoc &DL,
3407 int *BytesAdded) const {
3408 // Shouldn't be a fall through.
3409 assert(TBB && "insertBranch must not be told to insert a fallthrough")(static_cast <bool> (TBB && "insertBranch must not be told to insert a fallthrough"
) ? void (0) : __assert_fail ("TBB && \"insertBranch must not be told to insert a fallthrough\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3409, __extension__
__PRETTY_FUNCTION__))
;
3410 assert((Cond.size() == 1 || Cond.size() == 0) &&(static_cast <bool> ((Cond.size() == 1 || Cond.size() ==
0) && "X86 branch conditions have one component!") ?
void (0) : __assert_fail ("(Cond.size() == 1 || Cond.size() == 0) && \"X86 branch conditions have one component!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
3411 "X86 branch conditions have one component!")(static_cast <bool> ((Cond.size() == 1 || Cond.size() ==
0) && "X86 branch conditions have one component!") ?
void (0) : __assert_fail ("(Cond.size() == 1 || Cond.size() == 0) && \"X86 branch conditions have one component!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
;
3412 assert(!BytesAdded && "code size not handled")(static_cast <bool> (!BytesAdded && "code size not handled"
) ? void (0) : __assert_fail ("!BytesAdded && \"code size not handled\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3412, __extension__
__PRETTY_FUNCTION__))
;
3413
3414 if (Cond.empty()) {
3415 // Unconditional branch?
3416 assert(!FBB && "Unconditional branch with multiple successors!")(static_cast <bool> (!FBB && "Unconditional branch with multiple successors!"
) ? void (0) : __assert_fail ("!FBB && \"Unconditional branch with multiple successors!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3416, __extension__
__PRETTY_FUNCTION__))
;
3417 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
3418 return 1;
3419 }
3420
3421 // If FBB is null, it is implied to be a fall-through block.
3422 bool FallThru = FBB == nullptr;
3423
3424 // Conditional branch.
3425 unsigned Count = 0;
3426 X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
3427 switch (CC) {
3428 case X86::COND_NE_OR_P:
3429 // Synthesize NE_OR_P with two branches.
3430 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE);
3431 ++Count;
3432 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P);
3433 ++Count;
3434 break;
3435 case X86::COND_E_AND_NP:
3436 // Use the next block of MBB as FBB if it is null.
3437 if (FBB == nullptr) {
3438 FBB = getFallThroughMBB(&MBB, TBB);
3439 assert(FBB && "MBB cannot be the last block in function when the false "(static_cast <bool> (FBB && "MBB cannot be the last block in function when the false "
"body is a fall-through.") ? void (0) : __assert_fail ("FBB && \"MBB cannot be the last block in function when the false \" \"body is a fall-through.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3440, __extension__
__PRETTY_FUNCTION__))
3440 "body is a fall-through.")(static_cast <bool> (FBB && "MBB cannot be the last block in function when the false "
"body is a fall-through.") ? void (0) : __assert_fail ("FBB && \"MBB cannot be the last block in function when the false \" \"body is a fall-through.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3440, __extension__
__PRETTY_FUNCTION__))
;
3441 }
3442 // Synthesize COND_E_AND_NP with two branches.
3443 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE);
3444 ++Count;
3445 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP);
3446 ++Count;
3447 break;
3448 default: {
3449 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC);
3450 ++Count;
3451 }
3452 }
3453 if (!FallThru) {
3454 // Two-way Conditional branch. Insert the second branch.
3455 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
3456 ++Count;
3457 }
3458 return Count;
3459}
3460
3461bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
3462 ArrayRef<MachineOperand> Cond,
3463 Register DstReg, Register TrueReg,
3464 Register FalseReg, int &CondCycles,
3465 int &TrueCycles, int &FalseCycles) const {
3466 // Not all subtargets have cmov instructions.
3467 if (!Subtarget.hasCMov())
3468 return false;
3469 if (Cond.size() != 1)
3470 return false;
3471 // We cannot do the composite conditions, at least not in SSA form.
3472 if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND)
3473 return false;
3474
3475 // Check register classes.
3476 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3477 const TargetRegisterClass *RC =
3478 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
3479 if (!RC)
3480 return false;
3481
3482 // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
3483 if (X86::GR16RegClass.hasSubClassEq(RC) ||
3484 X86::GR32RegClass.hasSubClassEq(RC) ||
3485 X86::GR64RegClass.hasSubClassEq(RC)) {
3486 // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
3487 // Bridge. Probably Ivy Bridge as well.
3488 CondCycles = 2;
3489 TrueCycles = 2;
3490 FalseCycles = 2;
3491 return true;
3492 }
3493
3494 // Can't do vectors.
3495 return false;
3496}
3497
3498void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
3499 MachineBasicBlock::iterator I,
3500 const DebugLoc &DL, Register DstReg,
3501 ArrayRef<MachineOperand> Cond, Register TrueReg,
3502 Register FalseReg) const {
3503 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3504 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3505 const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
3506 assert(Cond.size() == 1 && "Invalid Cond array")(static_cast <bool> (Cond.size() == 1 && "Invalid Cond array"
) ? void (0) : __assert_fail ("Cond.size() == 1 && \"Invalid Cond array\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3506, __extension__
__PRETTY_FUNCTION__))
;
3507 unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8,
3508 false /*HasMemoryOperand*/);
3509 BuildMI(MBB, I, DL, get(Opc), DstReg)
3510 .addReg(FalseReg)
3511 .addReg(TrueReg)
3512 .addImm(Cond[0].getImm());
3513}
3514
3515/// Test if the given register is a physical h register.
3516static bool isHReg(unsigned Reg) {
3517 return X86::GR8_ABCD_HRegClass.contains(Reg);
3518}
3519
3520// Try and copy between VR128/VR64 and GR64 registers.
3521static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
3522 const X86Subtarget &Subtarget) {
3523 bool HasAVX = Subtarget.hasAVX();
3524 bool HasAVX512 = Subtarget.hasAVX512();
3525
3526 // SrcReg(MaskReg) -> DestReg(GR64)
3527 // SrcReg(MaskReg) -> DestReg(GR32)
3528
3529 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
3530 if (X86::VK16RegClass.contains(SrcReg)) {
3531 if (X86::GR64RegClass.contains(DestReg)) {
3532 assert(Subtarget.hasBWI())(static_cast <bool> (Subtarget.hasBWI()) ? void (0) : __assert_fail
("Subtarget.hasBWI()", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3532, __extension__ __PRETTY_FUNCTION__))
;
3533 return X86::KMOVQrk;
3534 }
3535 if (X86::GR32RegClass.contains(DestReg))
3536 return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
3537 }
3538
3539 // SrcReg(GR64) -> DestReg(MaskReg)
3540 // SrcReg(GR32) -> DestReg(MaskReg)
3541
3542 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
3543 if (X86::VK16RegClass.contains(DestReg)) {
3544 if (X86::GR64RegClass.contains(SrcReg)) {
3545 assert(Subtarget.hasBWI())(static_cast <bool> (Subtarget.hasBWI()) ? void (0) : __assert_fail
("Subtarget.hasBWI()", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3545, __extension__ __PRETTY_FUNCTION__))
;
3546 return X86::KMOVQkr;
3547 }
3548 if (X86::GR32RegClass.contains(SrcReg))
3549 return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
3550 }
3551
3552
3553 // SrcReg(VR128) -> DestReg(GR64)
3554 // SrcReg(VR64) -> DestReg(GR64)
3555 // SrcReg(GR64) -> DestReg(VR128)
3556 // SrcReg(GR64) -> DestReg(VR64)
3557
3558 if (X86::GR64RegClass.contains(DestReg)) {
3559 if (X86::VR128XRegClass.contains(SrcReg))
3560 // Copy from a VR128 register to a GR64 register.
3561 return HasAVX512 ? X86::VMOVPQIto64Zrr :
3562 HasAVX ? X86::VMOVPQIto64rr :
3563 X86::MOVPQIto64rr;
3564 if (X86::VR64RegClass.contains(SrcReg))
3565 // Copy from a VR64 register to a GR64 register.
3566 return X86::MMX_MOVD64from64rr;
3567 } else if (X86::GR64RegClass.contains(SrcReg)) {
3568 // Copy from a GR64 register to a VR128 register.
3569 if (X86::VR128XRegClass.contains(DestReg))
3570 return HasAVX512 ? X86::VMOV64toPQIZrr :
3571 HasAVX ? X86::VMOV64toPQIrr :
3572 X86::MOV64toPQIrr;
3573 // Copy from a GR64 register to a VR64 register.
3574 if (X86::VR64RegClass.contains(DestReg))
3575 return X86::MMX_MOVD64to64rr;
3576 }
3577
3578 // SrcReg(VR128) -> DestReg(GR32)
3579 // SrcReg(GR32) -> DestReg(VR128)
3580
3581 if (X86::GR32RegClass.contains(DestReg) &&
3582 X86::VR128XRegClass.contains(SrcReg))
3583 // Copy from a VR128 register to a GR32 register.
3584 return HasAVX512 ? X86::VMOVPDI2DIZrr :
3585 HasAVX ? X86::VMOVPDI2DIrr :
3586 X86::MOVPDI2DIrr;
3587
3588 if (X86::VR128XRegClass.contains(DestReg) &&
3589 X86::GR32RegClass.contains(SrcReg))
3590 // Copy from a VR128 register to a VR128 register.
3591 return HasAVX512 ? X86::VMOVDI2PDIZrr :
3592 HasAVX ? X86::VMOVDI2PDIrr :
3593 X86::MOVDI2PDIrr;
3594 return 0;
3595}
3596
3597void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
3598 MachineBasicBlock::iterator MI,
3599 const DebugLoc &DL, MCRegister DestReg,
3600 MCRegister SrcReg, bool KillSrc) const {
3601 // First deal with the normal symmetric copies.
3602 bool HasAVX = Subtarget.hasAVX();
3603 bool HasVLX = Subtarget.hasVLX();
3604 unsigned Opc = 0;
3605 if (X86::GR64RegClass.contains(DestReg, SrcReg))
3606 Opc = X86::MOV64rr;
3607 else if (X86::GR32RegClass.contains(DestReg, SrcReg))
3608 Opc = X86::MOV32rr;
3609 else if (X86::GR16RegClass.contains(DestReg, SrcReg))
3610 Opc = X86::MOV16rr;
3611 else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
3612 // Copying to or from a physical H register on x86-64 requires a NOREX
3613 // move. Otherwise use a normal move.
3614 if ((isHReg(DestReg) || isHReg(SrcReg)) &&
3615 Subtarget.is64Bit()) {
3616 Opc = X86::MOV8rr_NOREX;
3617 // Both operands must be encodable without an REX prefix.
3618 assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&(static_cast <bool> (X86::GR8_NOREXRegClass.contains(SrcReg
, DestReg) && "8-bit H register can not be copied outside GR8_NOREX"
) ? void (0) : __assert_fail ("X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && \"8-bit H register can not be copied outside GR8_NOREX\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3619, __extension__
__PRETTY_FUNCTION__))
3619 "8-bit H register can not be copied outside GR8_NOREX")(static_cast <bool> (X86::GR8_NOREXRegClass.contains(SrcReg
, DestReg) && "8-bit H register can not be copied outside GR8_NOREX"
) ? void (0) : __assert_fail ("X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && \"8-bit H register can not be copied outside GR8_NOREX\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3619, __extension__
__PRETTY_FUNCTION__))
;
3620 } else
3621 Opc = X86::MOV8rr;
3622 }
3623 else if (X86::VR64RegClass.contains(DestReg, SrcReg))
3624 Opc = X86::MMX_MOVQ64rr;
3625 else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
3626 if (HasVLX)
3627 Opc = X86::VMOVAPSZ128rr;
3628 else if (X86::VR128RegClass.contains(DestReg, SrcReg))
3629 Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
3630 else {
3631 // If this an extended register and we don't have VLX we need to use a
3632 // 512-bit move.
3633 Opc = X86::VMOVAPSZrr;
3634 const TargetRegisterInfo *TRI = &getRegisterInfo();
3635 DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm,
3636 &X86::VR512RegClass);
3637 SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm,
3638 &X86::VR512RegClass);
3639 }
3640 } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
3641 if (HasVLX)
3642 Opc = X86::VMOVAPSZ256rr;
3643 else if (X86::VR256RegClass.contains(DestReg, SrcReg))
3644 Opc = X86::VMOVAPSYrr;
3645 else {
3646 // If this an extended register and we don't have VLX we need to use a
3647 // 512-bit move.
3648 Opc = X86::VMOVAPSZrr;
3649 const TargetRegisterInfo *TRI = &getRegisterInfo();
3650 DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm,
3651 &X86::VR512RegClass);
3652 SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm,
3653 &X86::VR512RegClass);
3654 }
3655 } else if (X86::VR512RegClass.contains(DestReg, SrcReg))
3656 Opc = X86::VMOVAPSZrr;
3657 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
3658 else if (X86::VK16RegClass.contains(DestReg, SrcReg))
3659 Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
3660 if (!Opc)
3661 Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
3662
3663 if (Opc) {
3664 BuildMI(MBB, MI, DL, get(Opc), DestReg)
3665 .addReg(SrcReg, getKillRegState(KillSrc));
3666 return;
3667 }
3668
3669 if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
3670 // FIXME: We use a fatal error here because historically LLVM has tried
3671 // lower some of these physreg copies and we want to ensure we get
3672 // reasonable bug reports if someone encounters a case no other testing
3673 // found. This path should be removed after the LLVM 7 release.
3674 report_fatal_error("Unable to copy EFLAGS physical register!");
3675 }
3676
3677 LLVM_DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "Cannot copy " <<
RI.getName(SrcReg) << " to " << RI.getName(DestReg
) << '\n'; } } while (false)
3678 << RI.getName(DestReg) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "Cannot copy " <<
RI.getName(SrcReg) << " to " << RI.getName(DestReg
) << '\n'; } } while (false)
;
3679 report_fatal_error("Cannot emit physreg copy instruction");
3680}
3681
3682Optional<DestSourcePair>
3683X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
3684 if (MI.isMoveReg())
3685 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
3686 return None;
3687}
3688
3689static unsigned getLoadStoreRegOpcode(Register Reg,
3690 const TargetRegisterClass *RC,
3691 bool IsStackAligned,
3692 const X86Subtarget &STI, bool load) {
3693 bool HasAVX = STI.hasAVX();
3694 bool HasAVX512 = STI.hasAVX512();
3695 bool HasVLX = STI.hasVLX();
3696
3697 switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
3698 default:
3699 llvm_unreachable("Unknown spill size")::llvm::llvm_unreachable_internal("Unknown spill size", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3699)
;
3700 case 1:
3701 assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass")(static_cast <bool> (X86::GR8RegClass.hasSubClassEq(RC)
&& "Unknown 1-byte regclass") ? void (0) : __assert_fail
("X86::GR8RegClass.hasSubClassEq(RC) && \"Unknown 1-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3701, __extension__
__PRETTY_FUNCTION__))
;
3702 if (STI.is64Bit())
3703 // Copying to or from a physical H register on x86-64 requires a NOREX
3704 // move. Otherwise use a normal move.
3705 if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
3706 return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
3707 return load ? X86::MOV8rm : X86::MOV8mr;
3708 case 2:
3709 if (X86::VK16RegClass.hasSubClassEq(RC))
3710 return load ? X86::KMOVWkm : X86::KMOVWmk;
3711 if (X86::FR16XRegClass.hasSubClassEq(RC)) {
3712 assert(STI.hasFP16())(static_cast <bool> (STI.hasFP16()) ? void (0) : __assert_fail
("STI.hasFP16()", "llvm/lib/Target/X86/X86InstrInfo.cpp", 3712
, __extension__ __PRETTY_FUNCTION__))
;
3713 return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
3714 }
3715 assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass")(static_cast <bool> (X86::GR16RegClass.hasSubClassEq(RC
) && "Unknown 2-byte regclass") ? void (0) : __assert_fail
("X86::GR16RegClass.hasSubClassEq(RC) && \"Unknown 2-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3715, __extension__
__PRETTY_FUNCTION__))
;
3716 return load ? X86::MOV16rm : X86::MOV16mr;
3717 case 4:
3718 if (X86::GR32RegClass.hasSubClassEq(RC))
3719 return load ? X86::MOV32rm : X86::MOV32mr;
3720 if (X86::FR32XRegClass.hasSubClassEq(RC))
3721 return load ?
3722 (HasAVX512 ? X86::VMOVSSZrm_alt :
3723 HasAVX ? X86::VMOVSSrm_alt :
3724 X86::MOVSSrm_alt) :
3725 (HasAVX512 ? X86::VMOVSSZmr :
3726 HasAVX ? X86::VMOVSSmr :
3727 X86::MOVSSmr);
3728 if (X86::RFP32RegClass.hasSubClassEq(RC))
3729 return load ? X86::LD_Fp32m : X86::ST_Fp32m;
3730 if (X86::VK32RegClass.hasSubClassEq(RC)) {
3731 assert(STI.hasBWI() && "KMOVD requires BWI")(static_cast <bool> (STI.hasBWI() && "KMOVD requires BWI"
) ? void (0) : __assert_fail ("STI.hasBWI() && \"KMOVD requires BWI\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3731, __extension__
__PRETTY_FUNCTION__))
;
3732 return load ? X86::KMOVDkm : X86::KMOVDmk;
3733 }
3734 // All of these mask pair classes have the same spill size, the same kind
3735 // of kmov instructions can be used with all of them.
3736 if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
3737 X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
3738 X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
3739 X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
3740 X86::VK16PAIRRegClass.hasSubClassEq(RC))
3741 return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
3742 llvm_unreachable("Unknown 4-byte regclass")::llvm::llvm_unreachable_internal("Unknown 4-byte regclass", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3742)
;
3743 case 8:
3744 if (X86::GR64RegClass.hasSubClassEq(RC))
3745 return load ? X86::MOV64rm : X86::MOV64mr;
3746 if (X86::FR64XRegClass.hasSubClassEq(RC))
3747 return load ?
3748 (HasAVX512 ? X86::VMOVSDZrm_alt :
3749 HasAVX ? X86::VMOVSDrm_alt :
3750 X86::MOVSDrm_alt) :
3751 (HasAVX512 ? X86::VMOVSDZmr :
3752 HasAVX ? X86::VMOVSDmr :
3753 X86::MOVSDmr);
3754 if (X86::VR64RegClass.hasSubClassEq(RC))
3755 return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
3756 if (X86::RFP64RegClass.hasSubClassEq(RC))
3757 return load ? X86::LD_Fp64m : X86::ST_Fp64m;
3758 if (X86::VK64RegClass.hasSubClassEq(RC)) {
3759 assert(STI.hasBWI() && "KMOVQ requires BWI")(static_cast <bool> (STI.hasBWI() && "KMOVQ requires BWI"
) ? void (0) : __assert_fail ("STI.hasBWI() && \"KMOVQ requires BWI\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3759, __extension__
__PRETTY_FUNCTION__))
;
3760 return load ? X86::KMOVQkm : X86::KMOVQmk;
3761 }
3762 llvm_unreachable("Unknown 8-byte regclass")::llvm::llvm_unreachable_internal("Unknown 8-byte regclass", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3762)
;
3763 case 10:
3764 assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass")(static_cast <bool> (X86::RFP80RegClass.hasSubClassEq(RC
) && "Unknown 10-byte regclass") ? void (0) : __assert_fail
("X86::RFP80RegClass.hasSubClassEq(RC) && \"Unknown 10-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3764, __extension__
__PRETTY_FUNCTION__))
;
3765 return load ? X86::LD_Fp80m : X86::ST_FpP80m;
3766 case 16: {
3767 if (X86::VR128XRegClass.hasSubClassEq(RC)) {
3768 // If stack is realigned we can use aligned stores.
3769 if (IsStackAligned)
3770 return load ?
3771 (HasVLX ? X86::VMOVAPSZ128rm :
3772 HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
3773 HasAVX ? X86::VMOVAPSrm :
3774 X86::MOVAPSrm):
3775 (HasVLX ? X86::VMOVAPSZ128mr :
3776 HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
3777 HasAVX ? X86::VMOVAPSmr :
3778 X86::MOVAPSmr);
3779 else
3780 return load ?
3781 (HasVLX ? X86::VMOVUPSZ128rm :
3782 HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
3783 HasAVX ? X86::VMOVUPSrm :
3784 X86::MOVUPSrm):
3785 (HasVLX ? X86::VMOVUPSZ128mr :
3786 HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
3787 HasAVX ? X86::VMOVUPSmr :
3788 X86::MOVUPSmr);
3789 }
3790 llvm_unreachable("Unknown 16-byte regclass")::llvm::llvm_unreachable_internal("Unknown 16-byte regclass",
"llvm/lib/Target/X86/X86InstrInfo.cpp", 3790)
;
3791 }
3792 case 32:
3793 assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass")(static_cast <bool> (X86::VR256XRegClass.hasSubClassEq(
RC) && "Unknown 32-byte regclass") ? void (0) : __assert_fail
("X86::VR256XRegClass.hasSubClassEq(RC) && \"Unknown 32-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3793, __extension__
__PRETTY_FUNCTION__))
;
3794 // If stack is realigned we can use aligned stores.
3795 if (IsStackAligned)
3796 return load ?
3797 (HasVLX ? X86::VMOVAPSZ256rm :
3798 HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX :
3799 X86::VMOVAPSYrm) :
3800 (HasVLX ? X86::VMOVAPSZ256mr :
3801 HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX :
3802 X86::VMOVAPSYmr);
3803 else
3804 return load ?
3805 (HasVLX ? X86::VMOVUPSZ256rm :
3806 HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX :
3807 X86::VMOVUPSYrm) :
3808 (HasVLX ? X86::VMOVUPSZ256mr :
3809 HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX :
3810 X86::VMOVUPSYmr);
3811 case 64:
3812 assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass")(static_cast <bool> (X86::VR512RegClass.hasSubClassEq(RC
) && "Unknown 64-byte regclass") ? void (0) : __assert_fail
("X86::VR512RegClass.hasSubClassEq(RC) && \"Unknown 64-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3812, __extension__
__PRETTY_FUNCTION__))
;
3813 assert(STI.hasAVX512() && "Using 512-bit register requires AVX512")(static_cast <bool> (STI.hasAVX512() && "Using 512-bit register requires AVX512"
) ? void (0) : __assert_fail ("STI.hasAVX512() && \"Using 512-bit register requires AVX512\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3813, __extension__
__PRETTY_FUNCTION__))
;
3814 if (IsStackAligned)
3815 return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
3816 else
3817 return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
3818 }
3819}
3820
3821Optional<ExtAddrMode>
3822X86InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
3823 const TargetRegisterInfo *TRI) const {
3824 const MCInstrDesc &Desc = MemI.getDesc();
3825 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
3826 if (MemRefBegin < 0)
3827 return None;
3828
3829 MemRefBegin += X86II::getOperandBias(Desc);
3830
3831 auto &BaseOp = MemI.getOperand(MemRefBegin + X86::AddrBaseReg);
3832 if (!BaseOp.isReg()) // Can be an MO_FrameIndex
3833 return None;
3834
3835 const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp);
3836 // Displacement can be symbolic
3837 if (!DispMO.isImm())
3838 return None;
3839
3840 ExtAddrMode AM;
3841 AM.BaseReg = BaseOp.getReg();
3842 AM.ScaledReg = MemI.getOperand(MemRefBegin + X86::AddrIndexReg).getReg();
3843 AM.Scale = MemI.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm();
3844 AM.Displacement = DispMO.getImm();
3845 return AM;
3846}
3847
3848bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
3849 const Register Reg,
3850 int64_t &ImmVal) const {
3851 if (MI.getOpcode() != X86::MOV32ri && MI.getOpcode() != X86::MOV64ri)
3852 return false;
3853 // Mov Src can be a global address.
3854 if (!MI.getOperand(1).isImm() || MI.getOperand(0).getReg() != Reg)
3855 return false;
3856 ImmVal = MI.getOperand(1).getImm();
3857 return true;
3858}
3859
3860bool X86InstrInfo::preservesZeroValueInReg(
3861 const MachineInstr *MI, const Register NullValueReg,
3862 const TargetRegisterInfo *TRI) const {
3863 if (!MI->modifiesRegister(NullValueReg, TRI))
3864 return true;
3865 switch (MI->getOpcode()) {
3866 // Shift right/left of a null unto itself is still a null, i.e. rax = shl rax
3867 // X.
3868 case X86::SHR64ri:
3869 case X86::SHR32ri:
3870 case X86::SHL64ri:
3871 case X86::SHL32ri:
3872 assert(MI->getOperand(0).isDef() && MI->getOperand(1).isUse() &&(static_cast <bool> (MI->getOperand(0).isDef() &&
MI->getOperand(1).isUse() && "expected for shift opcode!"
) ? void (0) : __assert_fail ("MI->getOperand(0).isDef() && MI->getOperand(1).isUse() && \"expected for shift opcode!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3873, __extension__
__PRETTY_FUNCTION__))
3873 "expected for shift opcode!")(static_cast <bool> (MI->getOperand(0).isDef() &&
MI->getOperand(1).isUse() && "expected for shift opcode!"
) ? void (0) : __assert_fail ("MI->getOperand(0).isDef() && MI->getOperand(1).isUse() && \"expected for shift opcode!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3873, __extension__
__PRETTY_FUNCTION__))
;
3874 return MI->getOperand(0).getReg() == NullValueReg &&
3875 MI->getOperand(1).getReg() == NullValueReg;
3876 // Zero extend of a sub-reg of NullValueReg into itself does not change the
3877 // null value.
3878 case X86::MOV32rr:
3879 return llvm::all_of(MI->operands(), [&](const MachineOperand &MO) {
3880 return TRI->isSubRegisterEq(NullValueReg, MO.getReg());
3881 });
3882 default:
3883 return false;
3884 }
3885 llvm_unreachable("Should be handled above!")::llvm::llvm_unreachable_internal("Should be handled above!",
"llvm/lib/Target/X86/X86InstrInfo.cpp", 3885)
;
3886}
3887
3888bool X86InstrInfo::getMemOperandsWithOffsetWidth(
3889 const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
3890 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
3891 const TargetRegisterInfo *TRI) const {
3892 const MCInstrDesc &Desc = MemOp.getDesc();
3893 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
3894 if (MemRefBegin < 0)
3895 return false;
3896
3897 MemRefBegin += X86II::getOperandBias(Desc);
3898
3899 const MachineOperand *BaseOp =
3900 &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
3901 if (!BaseOp->isReg()) // Can be an MO_FrameIndex
3902 return false;
3903
3904 if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
3905 return false;
3906
3907 if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
3908 X86::NoRegister)
3909 return false;
3910
3911 const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp);
3912
3913 // Displacement can be symbolic
3914 if (!DispMO.isImm())
3915 return false;
3916
3917 Offset = DispMO.getImm();
3918
3919 if (!BaseOp->isReg())
3920 return false;
3921
3922 OffsetIsScalable = false;
3923 // FIXME: Relying on memoperands() may not be right thing to do here. Check
3924 // with X86 maintainers, and fix it accordingly. For now, it is ok, since
3925 // there is no use of `Width` for X86 back-end at the moment.
3926 Width =
3927 !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
3928 BaseOps.push_back(BaseOp);
3929 return true;
3930}
3931
3932static unsigned getStoreRegOpcode(Register SrcReg,
3933 const TargetRegisterClass *RC,
3934 bool IsStackAligned,
3935 const X86Subtarget &STI) {
3936 return getLoadStoreRegOpcode(SrcReg, RC, IsStackAligned, STI, false);
3937}
3938
3939static unsigned getLoadRegOpcode(Register DestReg,
3940 const TargetRegisterClass *RC,
3941 bool IsStackAligned, const X86Subtarget &STI) {
3942 return getLoadStoreRegOpcode(DestReg, RC, IsStackAligned, STI, true);
3943}
3944
3945void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
3946 MachineBasicBlock::iterator MI,
3947 Register SrcReg, bool isKill, int FrameIdx,
3948 const TargetRegisterClass *RC,
3949 const TargetRegisterInfo *TRI) const {
3950 const MachineFunction &MF = *MBB.getParent();
3951 const MachineFrameInfo &MFI = MF.getFrameInfo();
3952 assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&(static_cast <bool> (MFI.getObjectSize(FrameIdx) >= TRI
->getSpillSize(*RC) && "Stack slot too small for store"
) ? void (0) : __assert_fail ("MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && \"Stack slot too small for store\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3953, __extension__
__PRETTY_FUNCTION__))
3953 "Stack slot too small for store")(static_cast <bool> (MFI.getObjectSize(FrameIdx) >= TRI
->getSpillSize(*RC) && "Stack slot too small for store"
) ? void (0) : __assert_fail ("MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && \"Stack slot too small for store\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3953, __extension__
__PRETTY_FUNCTION__))
;
3954 if (RC->getID() == X86::TILERegClassID) {
3955 unsigned Opc = X86::TILESTORED;
3956 // tilestored %tmm, (%sp, %idx)
3957 MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
3958 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
3959 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
3960 MachineInstr *NewMI =
3961 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
3962 .addReg(SrcReg, getKillRegState(isKill));
3963 MachineOperand &MO = NewMI->getOperand(2);
3964 MO.setReg(VirtReg);
3965 MO.setIsKill(true);
3966 } else {
3967 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
3968 bool isAligned =
3969 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
3970 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
3971 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
3972 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
3973 .addReg(SrcReg, getKillRegState(isKill));
3974 }
3975}
3976
3977void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
3978 MachineBasicBlock::iterator MI,
3979 Register DestReg, int FrameIdx,
3980 const TargetRegisterClass *RC,
3981 const TargetRegisterInfo *TRI) const {
3982 if (RC->getID() == X86::TILERegClassID) {
3983 unsigned Opc = X86::TILELOADD;
3984 // tileloadd (%sp, %idx), %tmm
3985 MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
3986 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
3987 MachineInstr *NewMI =
3988 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
3989 NewMI = addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
3990 FrameIdx);
3991 MachineOperand &MO = NewMI->getOperand(3);
3992 MO.setReg(VirtReg);
3993 MO.setIsKill(true);
3994 } else {
3995 const MachineFunction &MF = *MBB.getParent();
3996 const MachineFrameInfo &MFI = MF.getFrameInfo();
3997 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
3998 bool isAligned =
3999 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
4000 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
4001 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
4002 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
4003 FrameIdx);
4004 }
4005}
4006
4007bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
4008 Register &SrcReg2, int64_t &CmpMask,
4009 int64_t &CmpValue) const {
4010 switch (MI.getOpcode()) {
4011 default: break;
4012 case X86::CMP64ri32:
4013 case X86::CMP64ri8:
4014 case X86::CMP32ri:
4015 case X86::CMP32ri8:
4016 case X86::CMP16ri:
4017 case X86::CMP16ri8:
4018 case X86::CMP8ri:
4019 SrcReg = MI.getOperand(0).getReg();
4020 SrcReg2 = 0;
4021 if (MI.getOperand(1).isImm()) {
4022 CmpMask = ~0;
4023 CmpValue = MI.getOperand(1).getImm();
4024 } else {
4025 CmpMask = CmpValue = 0;
4026 }
4027 return true;
4028 // A SUB can be used to perform comparison.
4029 case X86::SUB64rm:
4030 case X86::SUB32rm:
4031 case X86::SUB16rm:
4032 case X86::SUB8rm:
4033 SrcReg = MI.getOperand(1).getReg();
4034 SrcReg2 = 0;
4035 CmpMask = 0;
4036 CmpValue = 0;
4037 return true;
4038 case X86::SUB64rr:
4039 case X86::SUB32rr:
4040 case X86::SUB16rr:
4041 case X86::SUB8rr:
4042 SrcReg = MI.getOperand(1).getReg();
4043 SrcReg2 = MI.getOperand(2).getReg();
4044 CmpMask = 0;
4045 CmpValue = 0;
4046 return true;
4047 case X86::SUB64ri32:
4048 case X86::SUB64ri8:
4049 case X86::SUB32ri:
4050 case X86::SUB32ri8:
4051 case X86::SUB16ri:
4052 case X86::SUB16ri8:
4053 case X86::SUB8ri:
4054 SrcReg = MI.getOperand(1).getReg();
4055 SrcReg2 = 0;
4056 if (MI.getOperand(2).isImm()) {
4057 CmpMask = ~0;
4058 CmpValue = MI.getOperand(2).getImm();
4059 } else {
4060 CmpMask = CmpValue = 0;
4061 }
4062 return true;
4063 case X86::CMP64rr:
4064 case X86::CMP32rr:
4065 case X86::CMP16rr:
4066 case X86::CMP8rr:
4067 SrcReg = MI.getOperand(0).getReg();
4068 SrcReg2 = MI.getOperand(1).getReg();
4069 CmpMask = 0;
4070 CmpValue = 0;
4071 return true;
4072 case X86::TEST8rr:
4073 case X86::TEST16rr:
4074 case X86::TEST32rr:
4075 case X86::TEST64rr:
4076 SrcReg = MI.getOperand(0).getReg();
4077 if (MI.getOperand(1).getReg() != SrcReg)
4078 return false;
4079 // Compare against zero.
4080 SrcReg2 = 0;
4081 CmpMask = ~0;
4082 CmpValue = 0;
4083 return true;
4084 }
4085 return false;
4086}
4087
4088bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
4089 Register SrcReg, Register SrcReg2,
4090 int64_t ImmMask, int64_t ImmValue,
4091 const MachineInstr &OI, bool *IsSwapped,
4092 int64_t *ImmDelta) const {
4093 switch (OI.getOpcode()) {
4094 case X86::CMP64rr:
4095 case X86::CMP32rr:
4096 case X86::CMP16rr:
4097 case X86::CMP8rr:
4098 case X86::SUB64rr:
4099 case X86::SUB32rr:
4100 case X86::SUB16rr:
4101 case X86::SUB8rr: {
4102 Register OISrcReg;
4103 Register OISrcReg2;
4104 int64_t OIMask;
4105 int64_t OIValue;
4106 if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
4107 OIMask != ImmMask || OIValue != ImmValue)
4108 return false;
4109 if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
4110 *IsSwapped = false;
4111 return true;
4112 }
4113 if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
4114 *IsSwapped = true;
4115 return true;
4116 }
4117 return false;
4118 }
4119 case X86::CMP64ri32:
4120 case X86::CMP64ri8:
4121 case X86::CMP32ri:
4122 case X86::CMP32ri8:
4123 case X86::CMP16ri:
4124 case X86::CMP16ri8:
4125 case X86::CMP8ri:
4126 case X86::SUB64ri32:
4127 case X86::SUB64ri8:
4128 case X86::SUB32ri:
4129 case X86::SUB32ri8:
4130 case X86::SUB16ri:
4131 case X86::SUB16ri8:
4132 case X86::SUB8ri:
4133 case X86::TEST64rr:
4134 case X86::TEST32rr:
4135 case X86::TEST16rr:
4136 case X86::TEST8rr: {
4137 if (ImmMask != 0) {
4138 Register OISrcReg;
4139 Register OISrcReg2;
4140 int64_t OIMask;
4141 int64_t OIValue;
4142 if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
4143 SrcReg == OISrcReg && ImmMask == OIMask) {
4144 if (OIValue == ImmValue) {
4145 *ImmDelta = 0;
4146 return true;
4147 } else if (static_cast<uint64_t>(ImmValue) ==
4148 static_cast<uint64_t>(OIValue) - 1) {
4149 *ImmDelta = -1;
4150 return true;
4151 } else if (static_cast<uint64_t>(ImmValue) ==
4152 static_cast<uint64_t>(OIValue) + 1) {
4153 *ImmDelta = 1;
4154 return true;
4155 } else {
4156 return false;
4157 }
4158 }
4159 }
4160 return FlagI.isIdenticalTo(OI);
4161 }
4162 default:
4163 return false;
4164 }
4165}
4166
4167/// Check whether the definition can be converted
4168/// to remove a comparison against zero.
4169inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
4170 bool &ClearsOverflowFlag) {
4171 NoSignFlag = false;
4172 ClearsOverflowFlag = false;
4173
4174 switch (MI.getOpcode()) {
4175 default: return false;
4176
4177 // The shift instructions only modify ZF if their shift count is non-zero.
4178 // N.B.: The processor truncates the shift count depending on the encoding.
4179 case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri:
4180 case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri:
4181 return getTruncatedShiftCount(MI, 2) != 0;
4182
4183 // Some left shift instructions can be turned into LEA instructions but only
4184 // if their flags aren't used. Avoid transforming such instructions.
4185 case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{
4186 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
4187 if (isTruncatedShiftCountForLEA(ShAmt)) return false;
4188 return ShAmt != 0;
4189 }
4190
4191 case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8:
4192 case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
4193 return getTruncatedShiftCount(MI, 3) != 0;
4194
4195 case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
4196 case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
4197 case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
4198 case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
4199 case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
4200 case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
4201 case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
4202 case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
4203 case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
4204 case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
4205 case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
4206 case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
4207 case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri:
4208 case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8:
4209 case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr:
4210 case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm:
4211 case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm:
4212 case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri:
4213 case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8:
4214 case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr:
4215 case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm:
4216 case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm:
4217 case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
4218 case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1:
4219 case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1:
4220 case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
4221 case X86::LZCNT16rr: case X86::LZCNT16rm:
4222 case X86::LZCNT32rr: case X86::LZCNT32rm:
4223 case X86::LZCNT64rr: case X86::LZCNT64rm:
4224 case X86::POPCNT16rr:case X86::POPCNT16rm:
4225 case X86::POPCNT32rr:case X86::POPCNT32rm:
4226 case X86::POPCNT64rr:case X86::POPCNT64rm:
4227 case X86::TZCNT16rr: case X86::TZCNT16rm:
4228 case X86::TZCNT32rr: case X86::TZCNT32rm:
4229 case X86::TZCNT64rr: case X86::TZCNT64rm:
4230 return true;
4231 case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
4232 case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
4233 case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
4234 case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
4235 case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
4236 case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
4237 case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
4238 case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
4239 case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
4240 case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
4241 case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
4242 case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
4243 case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
4244 case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
4245 case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
4246 case X86::ANDN32rr: case X86::ANDN32rm:
4247 case X86::ANDN64rr: case X86::ANDN64rm:
4248 case X86::BLSI32rr: case X86::BLSI32rm:
4249 case X86::BLSI64rr: case X86::BLSI64rm:
4250 case X86::BLSMSK32rr: case X86::BLSMSK32rm:
4251 case X86::BLSMSK64rr: case X86::BLSMSK64rm:
4252 case X86::BLSR32rr: case X86::BLSR32rm:
4253 case X86::BLSR64rr: case X86::BLSR64rm:
4254 case X86::BLCFILL32rr: case X86::BLCFILL32rm:
4255 case X86::BLCFILL64rr: case X86::BLCFILL64rm:
4256 case X86::BLCI32rr: case X86::BLCI32rm:
4257 case X86::BLCI64rr: case X86::BLCI64rm:
4258 case X86::BLCIC32rr: case X86::BLCIC32rm:
4259 case X86::BLCIC64rr: case X86::BLCIC64rm:
4260 case X86::BLCMSK32rr: case X86::BLCMSK32rm:
4261 case X86::BLCMSK64rr: case X86::BLCMSK64rm:
4262 case X86::BLCS32rr: case X86::BLCS32rm:
4263 case X86::BLCS64rr: case X86::BLCS64rm:
4264 case X86::BLSFILL32rr: case X86::BLSFILL32rm:
4265 case X86::BLSFILL64rr: case X86::BLSFILL64rm:
4266 case X86::BLSIC32rr: case X86::BLSIC32rm:
4267 case X86::BLSIC64rr: case X86::BLSIC64rm:
4268 case X86::BZHI32rr: case X86::BZHI32rm:
4269 case X86::BZHI64rr: case X86::BZHI64rm:
4270 case X86::T1MSKC32rr: case X86::T1MSKC32rm:
4271 case X86::T1MSKC64rr: case X86::T1MSKC64rm:
4272 case X86::TZMSK32rr: case X86::TZMSK32rm:
4273 case X86::TZMSK64rr: case X86::TZMSK64rm:
4274 // These instructions clear the overflow flag just like TEST.
4275 // FIXME: These are not the only instructions in this switch that clear the
4276 // overflow flag.
4277 ClearsOverflowFlag = true;
4278 return true;
4279 case X86::BEXTR32rr: case X86::BEXTR64rr:
4280 case X86::BEXTR32rm: case X86::BEXTR64rm:
4281 case X86::BEXTRI32ri: case X86::BEXTRI32mi:
4282 case X86::BEXTRI64ri: case X86::BEXTRI64mi:
4283 // BEXTR doesn't update the sign flag so we can't use it. It does clear
4284 // the overflow flag, but that's not useful without the sign flag.
4285 NoSignFlag = true;
4286 return true;
4287 }
4288}
4289
4290/// Check whether the use can be converted to remove a comparison against zero.
4291static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
4292 switch (MI.getOpcode()) {
4293 default: return X86::COND_INVALID;
4294 case X86::NEG8r:
4295 case X86::NEG16r:
4296 case X86::NEG32r:
4297 case X86::NEG64r:
4298 return X86::COND_AE;
4299 case X86::LZCNT16rr:
4300 case X86::LZCNT32rr:
4301 case X86::LZCNT64rr:
4302 return X86::COND_B;
4303 case X86::POPCNT16rr:
4304 case X86::POPCNT32rr:
4305 case X86::POPCNT64rr:
4306 return X86::COND_E;
4307 case X86::TZCNT16rr:
4308 case X86::TZCNT32rr:
4309 case X86::TZCNT64rr:
4310 return X86::COND_B;
4311 case X86::BSF16rr:
4312 case X86::BSF32rr:
4313 case X86::BSF64rr:
4314 case X86::BSR16rr:
4315 case X86::BSR32rr:
4316 case X86::BSR64rr:
4317 return X86::COND_E;
4318 case X86::BLSI32rr:
4319 case X86::BLSI64rr:
4320 return X86::COND_AE;
4321 case X86::BLSR32rr:
4322 case X86::BLSR64rr:
4323 case X86::BLSMSK32rr:
4324 case X86::BLSMSK64rr:
4325 return X86::COND_B;
4326 // TODO: TBM instructions.
4327 }
4328}
4329
4330/// Check if there exists an earlier instruction that
4331/// operates on the same source operands and sets flags in the same way as
4332/// Compare; remove Compare if possible.
4333bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
4334 Register SrcReg2, int64_t CmpMask,
4335 int64_t CmpValue,
4336 const MachineRegisterInfo *MRI) const {
4337 // Check whether we can replace SUB with CMP.
4338 switch (CmpInstr.getOpcode()) {
4339 default: break;
4340 case X86::SUB64ri32:
4341 case X86::SUB64ri8:
4342 case X86::SUB32ri:
4343 case X86::SUB32ri8:
4344 case X86::SUB16ri:
4345 case X86::SUB16ri8:
4346 case X86::SUB8ri:
4347 case X86::SUB64rm:
4348 case X86::SUB32rm:
4349 case X86::SUB16rm:
4350 case X86::SUB8rm:
4351 case X86::SUB64rr:
4352 case X86::SUB32rr:
4353 case X86::SUB16rr:
4354 case X86::SUB8rr: {
4355 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
4356 return false;
4357 // There is no use of the destination register, we can replace SUB with CMP.
4358 unsigned NewOpcode = 0;
4359 switch (CmpInstr.getOpcode()) {
4360 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 4360)
;
4361 case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
4362 case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
4363 case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
4364 case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
4365 case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
4366 case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
4367 case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
4368 case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
4369 case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
4370 case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
4371 case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
4372 case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
4373 case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
4374 case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
4375 case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
4376 }
4377 CmpInstr.setDesc(get(NewOpcode));
4378 CmpInstr.RemoveOperand(0);
4379 // Mutating this instruction invalidates any debug data associated with it.
4380 CmpInstr.dropDebugNumber();
4381 // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
4382 if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
4383 NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
4384 return false;
4385 }
4386 }
4387
4388 // The following code tries to remove the comparison by re-using EFLAGS
4389 // from earlier instructions.
4390
4391 bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
4392
4393 // Transformation currently requires SSA values.
4394 if (SrcReg2.isPhysical())
4395 return false;
4396 MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
4397 assert(SrcRegDef && "Must have a definition (SSA)")(static_cast <bool> (SrcRegDef && "Must have a definition (SSA)"
) ? void (0) : __assert_fail ("SrcRegDef && \"Must have a definition (SSA)\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4397, __extension__
__PRETTY_FUNCTION__))
;
4398
4399 MachineInstr *MI = nullptr;
4400 MachineInstr *Sub = nullptr;
4401 MachineInstr *Movr0Inst = nullptr;
4402 bool NoSignFlag = false;
4403 bool ClearsOverflowFlag = false;
4404 bool ShouldUpdateCC = false;
4405 bool IsSwapped = false;
4406 X86::CondCode NewCC = X86::COND_INVALID;
4407 int64_t ImmDelta = 0;
4408
4409 // Search backward from CmpInstr for the next instruction defining EFLAGS.
4410 const TargetRegisterInfo *TRI = &getRegisterInfo();
4411 MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
4412 MachineBasicBlock::reverse_iterator From =
4413 std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
4414 for (MachineBasicBlock *MBB = &CmpMBB;;) {
4415 for (MachineInstr &Inst : make_range(From, MBB->rend())) {
4416 // Try to use EFLAGS from the instruction defining %SrcReg. Example:
4417 // %eax = addl ...
4418 // ... // EFLAGS not changed
4419 // testl %eax, %eax // <-- can be removed
4420 if (&Inst == SrcRegDef) {
4421 if (IsCmpZero &&
4422 isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
4423 MI = &Inst;
4424 break;
4425 }
4426 // Cannot find other candidates before definition of SrcReg.
4427 return false;
4428 }
4429
4430 if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
4431 // Try to use EFLAGS produced by an instruction reading %SrcReg.
4432 // Example:
4433 // %eax = ...
4434 // ...
4435 // popcntl %eax
4436 // ... // EFLAGS not changed
4437 // testl %eax, %eax // <-- can be removed
4438 if (IsCmpZero) {
4439 NewCC = isUseDefConvertible(Inst);
4440 if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
4441 Inst.getOperand(1).getReg() == SrcReg) {
4442 ShouldUpdateCC = true;
4443 MI = &Inst;
4444 break;
4445 }
4446 }
4447
4448 // Try to use EFLAGS from an instruction with similar flag results.
4449 // Example:
4450 // sub x, y or cmp x, y
4451 // ... // EFLAGS not changed
4452 // cmp x, y // <-- can be removed
4453 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
4454 Inst, &IsSwapped, &ImmDelta)) {
4455 Sub = &Inst;
4456 break;
4457 }
4458
4459 // MOV32r0 is implemented with xor which clobbers condition code. It is
4460 // safe to move up, if the definition to EFLAGS is dead and earlier
4461 // instructions do not read or write EFLAGS.
4462 if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
4463 Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
4464 Movr0Inst = &Inst;
4465 continue;
4466 }
4467
4468 // Cannot do anything for any other EFLAG changes.
4469 return false;
4470 }
4471 }
4472
4473 if (MI || Sub)
4474 break;
4475
4476 // Reached begin of basic block. Continue in predecessor if there is
4477 // exactly one.
4478 if (MBB->pred_size() != 1)
4479 return false;
4480 MBB = *MBB->pred_begin();
4481 From = MBB->rbegin();
4482 }
4483
4484 // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
4485 // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
4486 // If we are done with the basic block, we need to check whether EFLAGS is
4487 // live-out.
4488 bool FlagsMayLiveOut = true;
4489 SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
4490 MachineBasicBlock::iterator AfterCmpInstr =
4491 std::next(MachineBasicBlock::iterator(CmpInstr));
4492 for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
4493 bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
4494 bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
4495 // We should check the usage if this instruction uses and updates EFLAGS.
4496 if (!UseEFLAGS && ModifyEFLAGS) {
4497 // It is safe to remove CmpInstr if EFLAGS is updated again.
4498 FlagsMayLiveOut = false;
4499 break;
4500 }
4501 if (!UseEFLAGS && !ModifyEFLAGS)
4502 continue;
4503
4504 // EFLAGS is used by this instruction.
4505 X86::CondCode OldCC = X86::COND_INVALID;
4506 if (MI || IsSwapped || ImmDelta != 0) {
4507 // We decode the condition code from opcode.
4508 if (Instr.isBranch())
4509 OldCC = X86::getCondFromBranch(Instr);
4510 else {
4511 OldCC = X86::getCondFromSETCC(Instr);
4512 if (OldCC == X86::COND_INVALID)
4513 OldCC = X86::getCondFromCMov(Instr);
4514 }
4515 if (OldCC == X86::COND_INVALID) return false;
4516 }
4517 X86::CondCode ReplacementCC = X86::COND_INVALID;
4518 if (MI) {
4519 switch (OldCC) {
4520 default: break;
4521 case X86::COND_A: case X86::COND_AE:
4522 case X86::COND_B: case X86::COND_BE:
4523 // CF is used, we can't perform this optimization.
4524 return false;
4525 case X86::COND_G: case X86::COND_GE:
4526 case X86::COND_L: case X86::COND_LE:
4527 case X86::COND_O: case X86::COND_NO:
4528 // If OF is used, the instruction needs to clear it like CmpZero does.
4529 if (!ClearsOverflowFlag)
4530 return false;
4531 break;
4532 case X86::COND_S: case X86::COND_NS:
4533 // If SF is used, but the instruction doesn't update the SF, then we
4534 // can't do the optimization.
4535 if (NoSignFlag)
4536 return false;
4537 break;
4538 }
4539
4540 // If we're updating the condition code check if we have to reverse the
4541 // condition.
4542 if (ShouldUpdateCC)
4543 switch (OldCC) {
4544 default:
4545 return false;
4546 case X86::COND_E:
4547 ReplacementCC = NewCC;
4548 break;
4549 case X86::COND_NE:
4550 ReplacementCC = GetOppositeBranchCondition(NewCC);
4551 break;
4552 }
4553 } else if (IsSwapped) {
4554 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
4555 // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
4556 // We swap the condition code and synthesize the new opcode.
4557 ReplacementCC = getSwappedCondition(OldCC);
4558 if (ReplacementCC == X86::COND_INVALID)
4559 return false;
4560 ShouldUpdateCC = true;
4561 } else if (ImmDelta != 0) {
4562 unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
4563 // Shift amount for min/max constants to adjust for 8/16/32 instruction
4564 // sizes.
4565 switch (OldCC) {
4566 case X86::COND_L: // x <s (C + 1) --> x <=s C
4567 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
4568 return false;
4569 ReplacementCC = X86::COND_LE;
4570 break;
4571 case X86::COND_B: // x <u (C + 1) --> x <=u C
4572 if (ImmDelta != 1 || CmpValue == 0)
4573 return false;
4574 ReplacementCC = X86::COND_BE;
4575 break;
4576 case X86::COND_GE: // x >=s (C + 1) --> x >s C
4577 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
4578 return false;
4579 ReplacementCC = X86::COND_G;
4580 break;
4581 case X86::COND_AE: // x >=u (C + 1) --> x >u C
4582 if (ImmDelta != 1 || CmpValue == 0)
4583 return false;
4584 ReplacementCC = X86::COND_A;
4585 break;
4586 case X86::COND_G: // x >s (C - 1) --> x >=s C
4587 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
4588 return false;
4589 ReplacementCC = X86::COND_GE;
4590 break;
4591 case X86::COND_A: // x >u (C - 1) --> x >=u C
4592 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
4593 return false;
4594 ReplacementCC = X86::COND_AE;
4595 break;
4596 case X86::COND_LE: // x <=s (C - 1) --> x <s C
4597 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
4598 return false;
4599 ReplacementCC = X86::COND_L;
4600 break;
4601 case X86::COND_BE: // x <=u (C - 1) --> x <u C
4602 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
4603 return false;
4604 ReplacementCC = X86::COND_B;
4605 break;
4606 default:
4607 return false;
4608 }
4609 ShouldUpdateCC = true;
4610 }
4611
4612 if (ShouldUpdateCC && ReplacementCC != OldCC) {
4613 // Push the MachineInstr to OpsToUpdate.
4614 // If it is safe to remove CmpInstr, the condition code of these
4615 // instructions will be modified.
4616 OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
4617 }
4618 if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
4619 // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
4620 FlagsMayLiveOut = false;
4621 break;
4622 }
4623 }
4624
4625 // If we have to update users but EFLAGS is live-out abort, since we cannot
4626 // easily find all of the users.
4627 if ((MI != nullptr || ShouldUpdateCC) && FlagsMayLiveOut) {
4628 for (MachineBasicBlock *Successor : CmpMBB.successors())
4629 if (Successor->isLiveIn(X86::EFLAGS))
4630 return false;
4631 }
4632
4633 // The instruction to be updated is either Sub or MI.
4634 assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set")(static_cast <bool> ((MI == nullptr || Sub == nullptr) &&
"Should not have Sub and MI set") ? void (0) : __assert_fail
("(MI == nullptr || Sub == nullptr) && \"Should not have Sub and MI set\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4634, __extension__
__PRETTY_FUNCTION__))
;
4635 Sub = MI != nullptr ? MI : Sub;
4636 MachineBasicBlock *SubBB = Sub->getParent();
4637 // Move Movr0Inst to the appropriate place before Sub.
4638 if (Movr0Inst) {
4639 // Only move within the same block so we don't accidentally move to a
4640 // block with higher execution frequency.
4641 if (&CmpMBB != SubBB)
4642 return false;
4643 // Look backwards until we find a def that doesn't use the current EFLAGS.
4644 MachineBasicBlock::reverse_iterator InsertI = Sub,
4645 InsertE = Sub->getParent()->rend();
4646 for (; InsertI != InsertE; ++InsertI) {
4647 MachineInstr *Instr = &*InsertI;
4648 if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
4649 Instr->modifiesRegister(X86::EFLAGS, TRI)) {
4650 Movr0Inst->getParent()->remove(Movr0Inst);
4651 Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
4652 Movr0Inst);
4653 break;
4654 }
4655 }
4656 if (InsertI == InsertE)
4657 return false;
4658 }
4659
4660 // Make sure Sub instruction defines EFLAGS and mark the def live.
4661 MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS);
4662 assert(FlagDef && "Unable to locate a def EFLAGS operand")(static_cast <bool> (FlagDef && "Unable to locate a def EFLAGS operand"
) ? void (0) : __assert_fail ("FlagDef && \"Unable to locate a def EFLAGS operand\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4662, __extension__
__PRETTY_FUNCTION__))
;
4663 FlagDef->setIsDead(false);
4664
4665 CmpInstr.eraseFromParent();
4666
4667 // Modify the condition code of instructions in OpsToUpdate.
4668 for (auto &Op : OpsToUpdate) {
4669 Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
4670 .setImm(Op.second);
4671 }
4672 // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
4673 for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
4674 MBB = *MBB->pred_begin()) {
4675 assert(MBB->pred_size() == 1 && "Expected exactly one predecessor")(static_cast <bool> (MBB->pred_size() == 1 &&
"Expected exactly one predecessor") ? void (0) : __assert_fail
("MBB->pred_size() == 1 && \"Expected exactly one predecessor\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4675, __extension__
__PRETTY_FUNCTION__))
;
4676 if (!MBB->isLiveIn(X86::EFLAGS))
4677 MBB->addLiveIn(X86::EFLAGS);
4678 }
4679 return true;
4680}
4681
4682/// Try to remove the load by folding it to a register
4683/// operand at the use. We fold the load instructions if load defines a virtual
4684/// register, the virtual register is used once in the same BB, and the
4685/// instructions in-between do not load or store, and have no side effects.
4686MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
4687 const MachineRegisterInfo *MRI,
4688 Register &FoldAsLoadDefReg,
4689 MachineInstr *&DefMI) const {
4690 // Check whether we can move DefMI here.
4691 DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
4692 assert(DefMI)(static_cast <bool> (DefMI) ? void (0) : __assert_fail (
"DefMI", "llvm/lib/Target/X86/X86InstrInfo.cpp", 4692, __extension__
__PRETTY_FUNCTION__))
;
4693 bool SawStore = false;
4694 if (!DefMI->isSafeToMove(nullptr, SawStore))
4695 return nullptr;
4696
4697 // Collect information about virtual register operands of MI.
4698 SmallVector<unsigned, 1> SrcOperandIds;
4699 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
4700 MachineOperand &MO = MI.getOperand(i);
4701 if (!MO.isReg())
4702 continue;
4703 Register Reg = MO.getReg();
4704 if (Reg != FoldAsLoadDefReg)
4705 continue;
4706 // Do not fold if we have a subreg use or a def.
4707 if (MO.getSubReg() || MO.isDef())
4708 return nullptr;
4709 SrcOperandIds.push_back(i);
4710 }
4711 if (SrcOperandIds.empty())
4712 return nullptr;
4713
4714 // Check whether we can fold the def into SrcOperandId.
4715 if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) {
4716 FoldAsLoadDefReg = 0;
4717 return FoldMI;
4718 }
4719
4720 return nullptr;
4721}
4722
4723/// Expand a single-def pseudo instruction to a two-addr
4724/// instruction with two undef reads of the register being defined.
4725/// This is used for mapping:
4726/// %xmm4 = V_SET0
4727/// to:
4728/// %xmm4 = PXORrr undef %xmm4, undef %xmm4
4729///
4730static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
4731 const MCInstrDesc &Desc) {
4732 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.")(static_cast <bool> (Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.") ? void (0) : __assert_fail
("Desc.getNumOperands() == 3 && \"Expected two-addr instruction.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4732, __extension__
__PRETTY_FUNCTION__))
;
4733 Register Reg = MIB.getReg(0);
4734 MIB->setDesc(Desc);
4735
4736 // MachineInstr::addOperand() will insert explicit operands before any
4737 // implicit operands.
4738 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
4739 // But we don't trust that.
4740 assert(MIB.getReg(1) == Reg &&(static_cast <bool> (MIB.getReg(1) == Reg && MIB
.getReg(2) == Reg && "Misplaced operand") ? void (0) :
__assert_fail ("MIB.getReg(1) == Reg && MIB.getReg(2) == Reg && \"Misplaced operand\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4741, __extension__
__PRETTY_FUNCTION__))
4741 MIB.getReg(2) == Reg && "Misplaced operand")(static_cast <bool> (MIB.getReg(1) == Reg && MIB
.getReg(2) == Reg && "Misplaced operand") ? void (0) :
__assert_fail ("MIB.getReg(1) == Reg && MIB.getReg(2) == Reg && \"Misplaced operand\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4741, __extension__
__PRETTY_FUNCTION__))
;
4742 return true;
4743}
4744
4745/// Expand a single-def pseudo instruction to a two-addr
4746/// instruction with two %k0 reads.
4747/// This is used for mapping:
4748/// %k4 = K_SET1
4749/// to:
4750/// %k4 = KXNORrr %k0, %k0
4751static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc,
4752 Register Reg) {
4753 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.")(static_cast <bool> (Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.") ? void (0) : __assert_fail
("Desc.getNumOperands() == 3 && \"Expected two-addr instruction.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4753, __extension__
__PRETTY_FUNCTION__))
;
4754 MIB->setDesc(Desc);
4755 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
4756 return true;
4757}
4758
4759static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
4760 bool MinusOne) {
4761 MachineBasicBlock &MBB = *MIB->getParent();
4762 const DebugLoc &DL = MIB->getDebugLoc();
4763 Register Reg = MIB.getReg(0);
4764
4765 // Insert the XOR.
4766 BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
4767 .addReg(Reg, RegState::Undef)
4768 .addReg(Reg, RegState::Undef);
4769
4770 // Turn the pseudo into an INC or DEC.
4771 MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
4772 MIB.addReg(Reg);
4773
4774 return true;
4775}
4776
4777static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
4778 const TargetInstrInfo &TII,
4779 const X86Subtarget &Subtarget) {
4780 MachineBasicBlock &MBB = *MIB->getParent();
4781 const DebugLoc &DL = MIB->getDebugLoc();
4782 int64_t Imm = MIB->getOperand(1).getImm();
4783 assert(Imm != 0 && "Using push/pop for 0 is not efficient.")(static_cast <bool> (Imm != 0 && "Using push/pop for 0 is not efficient."
) ? void (0) : __assert_fail ("Imm != 0 && \"Using push/pop for 0 is not efficient.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4783, __extension__
__PRETTY_FUNCTION__))
;
4784 MachineBasicBlock::iterator I = MIB.getInstr();
4785
4786 int StackAdjustment;
4787
4788 if (Subtarget.is64Bit()) {
4789 assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||(static_cast <bool> (MIB->getOpcode() == X86::MOV64ImmSExti8
|| MIB->getOpcode() == X86::MOV32ImmSExti8) ? void (0) : __assert_fail
("MIB->getOpcode() == X86::MOV64ImmSExti8 || MIB->getOpcode() == X86::MOV32ImmSExti8"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4790, __extension__
__PRETTY_FUNCTION__))
4790 MIB->getOpcode() == X86::MOV32ImmSExti8)(static_cast <bool> (MIB->getOpcode() == X86::MOV64ImmSExti8
|| MIB->getOpcode() == X86::MOV32ImmSExti8) ? void (0) : __assert_fail
("MIB->getOpcode() == X86::MOV64ImmSExti8 || MIB->getOpcode() == X86::MOV32ImmSExti8"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4790, __extension__
__PRETTY_FUNCTION__))
;
4791
4792 // Can't use push/pop lowering if the function might write to the red zone.
4793 X86MachineFunctionInfo *X86FI =
4794 MBB.getParent()->getInfo<X86MachineFunctionInfo>();
4795 if (X86FI->getUsesRedZone()) {
4796 MIB->setDesc(TII.get(MIB->getOpcode() ==
4797 X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri));
4798 return true;
4799 }
4800
4801 // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
4802 // widen the register if necessary.
4803 StackAdjustment = 8;
4804 BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
4805 MIB->setDesc(TII.get(X86::POP64r));
4806 MIB->getOperand(0)
4807 .setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
4808 } else {
4809 assert(MIB->getOpcode() == X86::MOV32ImmSExti8)(static_cast <bool> (MIB->getOpcode() == X86::MOV32ImmSExti8
) ? void (0) : __assert_fail ("MIB->getOpcode() == X86::MOV32ImmSExti8"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4809, __extension__
__PRETTY_FUNCTION__))
;
4810 StackAdjustment = 4;
4811 BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
4812 MIB->setDesc(TII.get(X86::POP32r));
4813 }
4814 MIB->RemoveOperand(1);
4815 MIB->addImplicitDefUseOperands(*MBB.getParent());
4816
4817 // Build CFI if necessary.
4818 MachineFunction &MF = *MBB.getParent();
4819 const X86FrameLowering *TFL = Subtarget.getFrameLowering();
4820 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
4821 bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
4822 bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
4823 if (EmitCFI) {
4824 TFL->BuildCFI(MBB, I, DL,
4825 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
4826 TFL->BuildCFI(MBB, std::next(I), DL,
4827 MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
4828 }
4829
4830 return true;
4831}
4832
4833// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
4834// code sequence is needed for other targets.
4835static void expandLoadStackGuard(MachineInstrBuilder &MIB,
4836 const TargetInstrInfo &TII) {
4837 MachineBasicBlock &MBB = *MIB->getParent();
4838 const DebugLoc &DL = MIB->getDebugLoc();
4839 Register Reg = MIB.getReg(0);
4840 const GlobalValue *GV =
4841 cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
4842 auto Flags = MachineMemOperand::MOLoad |
4843 MachineMemOperand::MODereferenceable |
4844 MachineMemOperand::MOInvariant;
4845 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4846 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, Align(8));
4847 MachineBasicBlock::iterator I = MIB.getInstr();
4848
4849 BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
4850 .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0)
4851 .addMemOperand(MMO);
4852 MIB->setDebugLoc(DL);
4853 MIB->setDesc(TII.get(X86::MOV64rm));
4854 MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
4855}
4856
4857static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) {
4858 MachineBasicBlock &MBB = *MIB->getParent();
4859 MachineFunction &MF = *MBB.getParent();
4860 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
4861 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4862 unsigned XorOp =
4863 MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr;
4864 MIB->setDesc(TII.get(XorOp));
4865 MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef);
4866 return true;
4867}
4868
4869// This is used to handle spills for 128/256-bit registers when we have AVX512,
4870// but not VLX. If it uses an extended register we need to use an instruction
4871// that loads the lower 128/256-bit, but is available with only AVX512F.
4872static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
4873 const TargetRegisterInfo *TRI,
4874 const MCInstrDesc &LoadDesc,
4875 const MCInstrDesc &BroadcastDesc,
4876 unsigned SubIdx) {
4877 Register DestReg = MIB.getReg(0);
4878 // Check if DestReg is XMM16-31 or YMM16-31.
4879 if (TRI->getEncodingValue(DestReg) < 16) {
4880 // We can use a normal VEX encoded load.
4881 MIB->setDesc(LoadDesc);
4882 } else {
4883 // Use a 128/256-bit VBROADCAST instruction.
4884 MIB->setDesc(BroadcastDesc);
4885 // Change the destination to a 512-bit register.
4886 DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
4887 MIB->getOperand(0).setReg(DestReg);
4888 }
4889 return true;
4890}
4891
4892// This is used to handle spills for 128/256-bit registers when we have AVX512,
4893// but not VLX. If it uses an extended register we need to use an instruction
4894// that stores the lower 128/256-bit, but is available with only AVX512F.
4895static bool expandNOVLXStore(MachineInstrBuilder &MIB,
4896 const TargetRegisterInfo *TRI,
4897 const MCInstrDesc &StoreDesc,
4898 const MCInstrDesc &ExtractDesc,
4899 unsigned SubIdx) {
4900 Register SrcReg = MIB.getReg(X86::AddrNumOperands);
4901 // Check if DestReg is XMM16-31 or YMM16-31.
4902 if (TRI->getEncodingValue(SrcReg) < 16) {
4903 // We can use a normal VEX encoded store.
4904 MIB->setDesc(StoreDesc);
4905 } else {
4906 // Use a VEXTRACTF instruction.
4907 MIB->setDesc(ExtractDesc);
4908 // Change the destination to a 512-bit register.
4909 SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
4910 MIB->getOperand(X86::AddrNumOperands).setReg(SrcReg);
4911 MIB.addImm(0x0); // Append immediate to extract from the lower bits.
4912 }
4913
4914 return true;
4915}
4916
4917static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
4918 MIB->setDesc(Desc);
4919 int64_t ShiftAmt = MIB->getOperand(2).getImm();
4920 // Temporarily remove the immediate so we can add another source register.
4921 MIB->RemoveOperand(2);
4922 // Add the register. Don't copy the kill flag if there is one.
4923 MIB.addReg(MIB.getReg(1),
4924 getUndefRegState(MIB->getOperand(1).isUndef()));
4925 // Add back the immediate.
4926 MIB.addImm(ShiftAmt);
4927 return true;
4928}
4929
4930bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
4931 bool HasAVX = Subtarget.hasAVX();
4932 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4933 switch (MI.getOpcode()) {
4934 case X86::MOV32r0:
4935 return Expand2AddrUndef(MIB, get(X86::XOR32rr));
4936 case X86::MOV32r1:
4937 return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
4938 case X86::MOV32r_1:
4939 return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
4940 case X86::MOV32ImmSExti8:
4941 case X86::MOV64ImmSExti8:
4942 return ExpandMOVImmSExti8(MIB, *this, Subtarget);
4943 case X86::SETB_C32r:
4944 return Expand2AddrUndef(MIB, get(X86::SBB32rr));
4945 case X86::SETB_C64r:
4946 return Expand2AddrUndef(MIB, get(X86::SBB64rr));
4947 case X86::MMX_SET0:
4948 return Expand2AddrUndef(MIB, get(X86::MMX_PXORrr));
4949 case X86::V_SET0:
4950 case X86::FsFLD0SS:
4951 case X86::FsFLD0SD:
4952 case X86::FsFLD0F128:
4953 return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
4954 case X86::AVX_SET0: {
4955 assert(HasAVX && "AVX not supported")(static_cast <bool> (HasAVX && "AVX not supported"
) ? void (0) : __assert_fail ("HasAVX && \"AVX not supported\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4955, __extension__
__PRETTY_FUNCTION__))
;
4956 const TargetRegisterInfo *TRI = &getRegisterInfo();
4957 Register SrcReg = MIB.getReg(0);
4958 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
4959 MIB->getOperand(0).setReg(XReg);
4960 Expand2AddrUndef(MIB, get(X86::VXORPSrr));
4961 MIB.addReg(SrcReg, RegState::ImplicitDefine);
4962 return true;
4963 }
4964 case X86::AVX512_128_SET0:
4965 case X86::AVX512_FsFLD0SH:
4966 case X86::AVX512_FsFLD0SS:
4967 case X86::AVX512_FsFLD0SD:
4968 case X86::AVX512_FsFLD0F128: {
4969 bool HasVLX = Subtarget.hasVLX();
4970 Register SrcReg = MIB.getReg(0);
4971 const TargetRegisterInfo *TRI = &getRegisterInfo();
4972 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
4973 return Expand2AddrUndef(MIB,
4974 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
4975 // Extended register without VLX. Use a larger XOR.
4976 SrcReg =
4977 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
4978 MIB->getOperand(0).setReg(SrcReg);
4979 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
4980 }
4981 case X86::AVX512_256_SET0:
4982 case X86::AVX512_512_SET0: {
4983 bool HasVLX = Subtarget.hasVLX();
4984 Register SrcReg = MIB.getReg(0);
4985 const TargetRegisterInfo *TRI = &getRegisterInfo();
4986 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
4987 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
4988 MIB->getOperand(0).setReg(XReg);
4989 Expand2AddrUndef(MIB,
4990 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
4991 MIB.addReg(SrcReg, RegState::ImplicitDefine);
4992 return true;
4993 }
4994 if (MI.getOpcode() == X86::AVX512_256_SET0) {
4995 // No VLX so we must reference a zmm.
4996 unsigned ZReg =
4997 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
4998 MIB->getOperand(0).setReg(ZReg);
4999 }
5000 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
5001 }
5002 case X86::V_SETALLONES:
5003 return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
5004 case X86::AVX2_SETALLONES:
5005 return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
5006 case X86::AVX1_SETALLONES: {
5007 Register Reg = MIB.getReg(0);
5008 // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
5009 MIB->setDesc(get(X86::VCMPPSYrri));
5010 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
5011 return true;
5012 }
5013 case X86::AVX512_512_SETALLONES: {
5014 Register Reg = MIB.getReg(0);
5015 MIB->setDesc(get(X86::VPTERNLOGDZrri));
5016 // VPTERNLOGD needs 3 register inputs and an immediate.
5017 // 0xff will return 1s for any input.
5018 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef)
5019 .addReg(Reg, RegState::Undef).addImm(0xff);
5020 return true;
5021 }
5022 case X86::AVX512_512_SEXT_MASK_32:
5023 case X86::AVX512_512_SEXT_MASK_64: {
5024 Register Reg = MIB.getReg(0);
5025 Register MaskReg = MIB.getReg(1);
5026 unsigned MaskState = getRegState(MIB->getOperand(1));
5027 unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
5028 X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
5029 MI.RemoveOperand(1);
5030 MIB->setDesc(get(Opc));
5031 // VPTERNLOG needs 3 register inputs and an immediate.
5032 // 0xff will return 1s for any input.
5033 MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
5034 .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
5035 return true;
5036 }
5037 case X86::VMOVAPSZ128rm_NOVLX:
5038 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
5039 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
5040 case X86::VMOVUPSZ128rm_NOVLX:
5041 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
5042 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
5043 case X86::VMOVAPSZ256rm_NOVLX:
5044 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
5045 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
5046 case X86::VMOVUPSZ256rm_NOVLX:
5047 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
5048 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
5049 case X86::VMOVAPSZ128mr_NOVLX:
5050 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
5051 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
5052 case X86::VMOVUPSZ128mr_NOVLX:
5053 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
5054 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
5055 case X86::VMOVAPSZ256mr_NOVLX:
5056 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
5057 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
5058 case X86::VMOVUPSZ256mr_NOVLX:
5059 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
5060 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
5061 case X86::MOV32ri64: {
5062 Register Reg = MIB.getReg(0);
5063 Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
5064 MI.setDesc(get(X86::MOV32ri));
5065 MIB->getOperand(0).setReg(Reg32);
5066 MIB.addReg(Reg, RegState::ImplicitDefine);
5067 return true;
5068 }
5069
5070 // KNL does not recognize dependency-breaking idioms for mask registers,
5071 // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
5072 // Using %k0 as the undef input register is a performance heuristic based
5073 // on the assumption that %k0 is used less frequently than the other mask
5074 // registers, since it is not usable as a write mask.
5075 // FIXME: A more advanced approach would be to choose the best input mask
5076 // register based on context.
5077 case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
5078 case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
5079 case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
5080 case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
5081 case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
5082 case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
5083 case TargetOpcode::LOAD_STACK_GUARD:
5084 expandLoadStackGuard(MIB, *this);
5085 return true;
5086 case X86::XOR64_FP:
5087 case X86::XOR32_FP:
5088 return expandXorFP(MIB, *this);
5089 case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8));
5090 case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8));
5091 case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8));
5092 case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8));
5093 case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break;
5094 case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break;
5095 case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break;
5096 case X86::ADD64rr_DB: MIB->setDesc(get(X86::OR64rr)); break;
5097 case X86::ADD8ri_DB: MIB->setDesc(get(X86::OR8ri)); break;
5098 case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break;
5099 case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break;
5100 case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break;
5101 case X86::ADD16ri8_DB: MIB->setDesc(get(X86::OR16ri8)); break;
5102 case X86::ADD32ri8_DB: MIB->setDesc(get(X86::OR32ri8)); break;
5103 case X86::ADD64ri8_DB: MIB->setDesc(get(X86::OR64ri8)); break;
5104 }
5105 return false;
5106}
5107
5108/// Return true for all instructions that only update
5109/// the first 32 or 64-bits of the destination register and leave the rest
5110/// unmodified. This can be used to avoid folding loads if the instructions
5111/// only update part of the destination register, and the non-updated part is
5112/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
5113/// instructions breaks the partial register dependency and it can improve
5114/// performance. e.g.:
5115///
5116/// movss (%rdi), %xmm0
5117/// cvtss2sd %xmm0, %xmm0
5118///
5119/// Instead of
5120/// cvtss2sd (%rdi), %xmm0
5121///
5122/// FIXME: This should be turned into a TSFlags.
5123///
5124static bool hasPartialRegUpdate(unsigned Opcode,
5125 const X86Subtarget &Subtarget,
5126 bool ForLoadFold = false) {
5127 switch (Opcode) {
5128 case X86::CVTSI2SSrr:
5129 case X86::CVTSI2SSrm:
5130 case X86::CVTSI642SSrr:
5131 case X86::CVTSI642SSrm:
5132 case X86::CVTSI2SDrr:
5133 case X86::CVTSI2SDrm:
5134 case X86::CVTSI642SDrr:
5135 case X86::CVTSI642SDrm:
5136 // Load folding won't effect the undef register update since the input is
5137 // a GPR.
5138 return !ForLoadFold;
5139 case X86::CVTSD2SSrr:
5140 case X86::CVTSD2SSrm:
5141 case X86::CVTSS2SDrr:
5142 case X86::CVTSS2SDrm:
5143 case X86::MOVHPDrm:
5144 case X86::MOVHPSrm:
5145 case X86::MOVLPDrm:
5146 case X86::MOVLPSrm:
5147 case X86::RCPSSr:
5148 case X86::RCPSSm:
5149 case X86::RCPSSr_Int:
5150 case X86::RCPSSm_Int:
5151 case X86::ROUNDSDr:
5152 case X86::ROUNDSDm:
5153 case X86::ROUNDSSr:
5154 case X86::ROUNDSSm:
5155 case X86::RSQRTSSr:
5156 case X86::RSQRTSSm:
5157 case X86::RSQRTSSr_Int:
5158 case X86::RSQRTSSm_Int:
5159 case X86::SQRTSSr:
5160 case X86::SQRTSSm:
5161 case X86::SQRTSSr_Int:
5162 case X86::SQRTSSm_Int:
5163 case X86::SQRTSDr:
5164 case X86::SQRTSDm:
5165 case X86::SQRTSDr_Int:
5166 case X86::SQRTSDm_Int:
5167 return true;
5168 // GPR
5169 case X86::POPCNT32rm:
5170 case X86::POPCNT32rr:
5171 case X86::POPCNT64rm:
5172 case X86::POPCNT64rr:
5173 return Subtarget.hasPOPCNTFalseDeps();
5174 case X86::LZCNT32rm:
5175 case X86::LZCNT32rr:
5176 case X86::LZCNT64rm:
5177 case X86::LZCNT64rr:
5178 case X86::TZCNT32rm:
5179 case X86::TZCNT32rr:
5180 case X86::TZCNT64rm:
5181 case X86::TZCNT64rr:
5182 return Subtarget.hasLZCNTFalseDeps();
5183 }
5184
5185 return false;
5186}
5187
5188/// Inform the BreakFalseDeps pass how many idle
5189/// instructions we would like before a partial register update.
5190unsigned X86InstrInfo::getPartialRegUpdateClearance(
5191 const MachineInstr &MI, unsigned OpNum,
5192 const TargetRegisterInfo *TRI) const {
5193 if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget))
5194 return 0;
5195
5196 // If MI is marked as reading Reg, the partial register update is wanted.
5197 const MachineOperand &MO = MI.getOperand(0);
5198 Register Reg = MO.getReg();
5199 if (Reg.isVirtual()) {
5200 if (MO.readsReg() || MI.readsVirtualRegister(Reg))
5201 return 0;
5202 } else {
5203 if (MI.readsRegister(Reg, TRI))
5204 return 0;
5205 }
5206
5207 // If any instructions in the clearance range are reading Reg, insert a
5208 // dependency breaking instruction, which is inexpensive and is likely to
5209 // be hidden in other instruction's cycles.
5210 return PartialRegUpdateClearance;
5211}
5212
5213// Return true for any instruction the copies the high bits of the first source
5214// operand into the unused high bits of the destination operand.
5215// Also returns true for instructions that have two inputs where one may
5216// be undef and we want it to use the same register as the other input.
5217static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
5218 bool ForLoadFold = false) {
5219 // Set the OpNum parameter to the first source operand.
5220 switch (Opcode) {
5221 case X86::MMX_PUNPCKHBWrr:
5222 case X86::MMX_PUNPCKHWDrr:
5223 case X86::MMX_PUNPCKHDQrr:
5224 case X86::MMX_PUNPCKLBWrr:
5225 case X86::MMX_PUNPCKLWDrr:
5226 case X86::MMX_PUNPCKLDQrr:
5227 case X86::MOVHLPSrr:
5228 case X86::PACKSSWBrr:
5229 case X86::PACKUSWBrr:
5230 case X86::PACKSSDWrr:
5231 case X86::PACKUSDWrr:
5232 case X86::PUNPCKHBWrr:
5233 case X86::PUNPCKLBWrr:
5234 case X86::PUNPCKHWDrr:
5235 case X86::PUNPCKLWDrr:
5236 case X86::PUNPCKHDQrr:
5237 case X86::PUNPCKLDQrr:
5238 case X86::PUNPCKHQDQrr:
5239 case X86::PUNPCKLQDQrr:
5240 case X86::SHUFPDrri:
5241 case X86::SHUFPSrri:
5242 // These instructions are sometimes used with an undef first or second
5243 // source. Return true here so BreakFalseDeps will assign this source to the
5244 // same register as the first source to avoid a false dependency.
5245 // Operand 1 of these instructions is tied so they're separate from their
5246 // VEX counterparts.
5247 return OpNum == 2 && !ForLoadFold;
5248
5249 case X86::VMOVLHPSrr:
5250 case X86::VMOVLHPSZrr:
5251 case X86::VPACKSSWBrr:
5252 case X86::VPACKUSWBrr:
5253 case X86::VPACKSSDWrr:
5254 case X86::VPACKUSDWrr:
5255 case X86::VPACKSSWBZ128rr:
5256 case X86::VPACKUSWBZ128rr:
5257 case X86::VPACKSSDWZ128rr:
5258 case X86::VPACKUSDWZ128rr:
5259 case X86::VPERM2F128rr:
5260 case X86::VPERM2I128rr:
5261 case X86::VSHUFF32X4Z256rri:
5262 case X86::VSHUFF32X4Zrri:
5263 case X86::VSHUFF64X2Z256rri:
5264 case X86::VSHUFF64X2Zrri:
5265 case X86::VSHUFI32X4Z256rri:
5266 case X86::VSHUFI32X4Zrri:
5267 case X86::VSHUFI64X2Z256rri:
5268 case X86::VSHUFI64X2Zrri:
5269 case X86::VPUNPCKHBWrr:
5270 case X86::VPUNPCKLBWrr:
5271 case X86::VPUNPCKHBWYrr:
5272 case X86::VPUNPCKLBWYrr:
5273 case X86::VPUNPCKHBWZ128rr:
5274 case X86::VPUNPCKLBWZ128rr:
5275 case X86::VPUNPCKHBWZ256rr:
5276 case X86::VPUNPCKLBWZ256rr:
5277 case X86::VPUNPCKHBWZrr:
5278 case X86::VPUNPCKLBWZrr:
5279 case X86::VPUNPCKHWDrr:
5280 case X86::VPUNPCKLWDrr:
5281 case X86::VPUNPCKHWDYrr:
5282 case X86::VPUNPCKLWDYrr:
5283 case X86::VPUNPCKHWDZ128rr:
5284 case X86::VPUNPCKLWDZ128rr:
5285 case X86::VPUNPCKHWDZ256rr:
5286 case X86::VPUNPCKLWDZ256rr:
5287 case X86::VPUNPCKHWDZrr:
5288 case X86::VPUNPCKLWDZrr:
5289 case X86::VPUNPCKHDQrr:
5290 case X86::VPUNPCKLDQrr:
5291 case X86::VPUNPCKHDQYrr:
5292 case X86::VPUNPCKLDQYrr:
5293 case X86::VPUNPCKHDQZ128rr:
5294 case X86::VPUNPCKLDQZ128rr:
5295 case X86::VPUNPCKHDQZ256rr:
5296 case X86::VPUNPCKLDQZ256rr:
5297 case X86::VPUNPCKHDQZrr:
5298 case X86::VPUNPCKLDQZrr:
5299 case X86::VPUNPCKHQDQrr:
5300 case X86::VPUNPCKLQDQrr:
5301 case X86::VPUNPCKHQDQYrr:
5302 case X86::VPUNPCKLQDQYrr:
5303 case X86::VPUNPCKHQDQZ128rr:
5304 case X86::VPUNPCKLQDQZ128rr:
5305 case X86::VPUNPCKHQDQZ256rr:
5306 case X86::VPUNPCKLQDQZ256rr:
5307 case X86::VPUNPCKHQDQZrr:
5308 case X86::VPUNPCKLQDQZrr:
5309 // These instructions are sometimes used with an undef first or second
5310 // source. Return true here so BreakFalseDeps will assign this source to the
5311 // same register as the first source to avoid a false dependency.
5312 return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
5313
5314 case X86::VCVTSI2SSrr:
5315 case X86::VCVTSI2SSrm:
5316 case X86::VCVTSI2SSrr_Int:
5317 case X86::VCVTSI2SSrm_Int:
5318 case X86::VCVTSI642SSrr:
5319 case X86::VCVTSI642SSrm:
5320 case X86::VCVTSI642SSrr_Int:
5321 case X86::VCVTSI642SSrm_Int:
5322 case X86::VCVTSI2SDrr:
5323 case X86::VCVTSI2SDrm:
5324 case X86::VCVTSI2SDrr_Int:
5325 case X86::VCVTSI2SDrm_Int:
5326 case X86::VCVTSI642SDrr:
5327 case X86::VCVTSI642SDrm:
5328 case X86::VCVTSI642SDrr_Int:
5329 case X86::VCVTSI642SDrm_Int:
5330 // AVX-512
5331 case X86::VCVTSI2SSZrr:
5332 case X86::VCVTSI2SSZrm:
5333 case X86::VCVTSI2SSZrr_Int:
5334 case X86::VCVTSI2SSZrrb_Int:
5335 case X86::VCVTSI2SSZrm_Int:
5336 case X86::VCVTSI642SSZrr:
5337 case X86::VCVTSI642SSZrm:
5338 case X86::VCVTSI642SSZrr_Int:
5339 case X86::VCVTSI642SSZrrb_Int:
5340 case X86::VCVTSI642SSZrm_Int:
5341 case X86::VCVTSI2SDZrr:
5342 case X86::VCVTSI2SDZrm:
5343 case X86::VCVTSI2SDZrr_Int:
5344 case X86::VCVTSI2SDZrm_Int:
5345 case X86::VCVTSI642SDZrr:
5346 case X86::VCVTSI642SDZrm:
5347 case X86::VCVTSI642SDZrr_Int:
5348 case X86::VCVTSI642SDZrrb_Int:
5349 case X86::VCVTSI642SDZrm_Int:
5350 case X86::VCVTUSI2SSZrr:
5351 case X86::VCVTUSI2SSZrm:
5352 case X86::VCVTUSI2SSZrr_Int:
5353 case X86::VCVTUSI2SSZrrb_Int:
5354 case X86::VCVTUSI2SSZrm_Int:
5355 case X86::VCVTUSI642SSZrr:
5356 case X86::VCVTUSI642SSZrm:
5357 case X86::VCVTUSI642SSZrr_Int:
5358 case X86::VCVTUSI642SSZrrb_Int:
5359 case X86::VCVTUSI642SSZrm_Int:
5360 case X86::VCVTUSI2SDZrr:
5361 case X86::VCVTUSI2SDZrm:
5362 case X86::VCVTUSI2SDZrr_Int:
5363 case X86::VCVTUSI2SDZrm_Int:
5364 case X86::VCVTUSI642SDZrr:
5365 case X86::VCVTUSI642SDZrm:
5366 case X86::VCVTUSI642SDZrr_Int:
5367 case X86::VCVTUSI642SDZrrb_Int:
5368 case X86::VCVTUSI642SDZrm_Int:
5369 case X86::VCVTSI2SHZrr:
5370 case X86::VCVTSI2SHZrm:
5371 case X86::VCVTSI2SHZrr_Int:
5372 case X86::VCVTSI2SHZrrb_Int:
5373 case X86::VCVTSI2SHZrm_Int:
5374 case X86::VCVTSI642SHZrr:
5375 case X86::VCVTSI642SHZrm:
5376 case X86::VCVTSI642SHZrr_Int:
5377 case X86::VCVTSI642SHZrrb_Int:
5378 case X86::VCVTSI642SHZrm_Int:
5379 case X86::VCVTUSI2SHZrr:
5380 case X86::VCVTUSI2SHZrm:
5381 case X86::VCVTUSI2SHZrr_Int:
5382 case X86::VCVTUSI2SHZrrb_Int:
5383 case X86::VCVTUSI2SHZrm_Int:
5384 case X86::VCVTUSI642SHZrr:
5385 case X86::VCVTUSI642SHZrm:
5386 case X86::VCVTUSI642SHZrr_Int:
5387 case X86::VCVTUSI642SHZrrb_Int:
5388 case X86::VCVTUSI642SHZrm_Int:
5389 // Load folding won't effect the undef register update since the input is
5390 // a GPR.
5391 return OpNum == 1 && !ForLoadFold;
5392 case X86::VCVTSD2SSrr:
5393 case X86::VCVTSD2SSrm:
5394 case X86::VCVTSD2SSrr_Int:
5395 case X86::VCVTSD2SSrm_Int:
5396 case X86::VCVTSS2SDrr:
5397 case X86::VCVTSS2SDrm:
5398 case X86::VCVTSS2SDrr_Int:
5399 case X86::VCVTSS2SDrm_Int:
5400 case X86::VRCPSSr:
5401 case X86::VRCPSSr_Int:
5402 case X86::VRCPSSm:
5403 case X86::VRCPSSm_Int:
5404 case X86::VROUNDSDr:
5405 case X86::VROUNDSDm:
5406 case X86::VROUNDSDr_Int:
5407 case X86::VROUNDSDm_Int:
5408 case X86::VROUNDSSr:
5409 case X86::VROUNDSSm:
5410 case X86::VROUNDSSr_Int:
5411 case X86::VROUNDSSm_Int:
5412 case X86::VRSQRTSSr:
5413 case X86::VRSQRTSSr_Int:
5414 case X86::VRSQRTSSm:
5415 case X86::VRSQRTSSm_Int:
5416 case X86::VSQRTSSr:
5417 case X86::VSQRTSSr_Int:
5418 case X86::VSQRTSSm:
5419 case X86::VSQRTSSm_Int:
5420 case X86::VSQRTSDr:
5421 case X86::VSQRTSDr_Int:
5422 case X86::VSQRTSDm:
5423 case X86::VSQRTSDm_Int:
5424 // AVX-512
5425 case X86::VCVTSD2SSZrr:
5426 case X86::VCVTSD2SSZrr_Int:
5427 case X86::VCVTSD2SSZrrb_Int:
5428 case X86::VCVTSD2SSZrm:
5429 case X86::VCVTSD2SSZrm_Int:
5430 case X86::VCVTSS2SDZrr:
5431 case X86::VCVTSS2SDZrr_Int:
5432 case X86::VCVTSS2SDZrrb_Int:
5433 case X86::VCVTSS2SDZrm:
5434 case X86::VCVTSS2SDZrm_Int:
5435 case X86::VGETEXPSDZr:
5436 case X86::VGETEXPSDZrb:
5437 case X86::VGETEXPSDZm:
5438 case X86::VGETEXPSSZr:
5439 case X86::VGETEXPSSZrb:
5440 case X86::VGETEXPSSZm:
5441 case X86::VGETMANTSDZrri:
5442 case X86::VGETMANTSDZrrib:
5443 case X86::VGETMANTSDZrmi:
5444 case X86::VGETMANTSSZrri:
5445 case X86::VGETMANTSSZrrib:
5446 case X86::VGETMANTSSZrmi:
5447 case X86::VRNDSCALESDZr:
5448 case X86::VRNDSCALESDZr_Int:
5449 case X86::VRNDSCALESDZrb_Int:
5450 case X86::VRNDSCALESDZm:
5451 case X86::VRNDSCALESDZm_Int:
5452 case X86::VRNDSCALESSZr:
5453 case X86::VRNDSCALESSZr_Int:
5454 case X86::VRNDSCALESSZrb_Int:
5455 case X86::VRNDSCALESSZm:
5456 case X86::VRNDSCALESSZm_Int:
5457 case X86::VRCP14SDZrr:
5458 case X86::VRCP14SDZrm:
5459 case X86::VRCP14SSZrr:
5460 case X86::VRCP14SSZrm:
5461 case X86::VRCPSHZrr:
5462 case X86::VRCPSHZrm:
5463 case X86::VRSQRTSHZrr:
5464 case X86::VRSQRTSHZrm:
5465 case X86::VREDUCESHZrmi:
5466 case X86::VREDUCESHZrri:
5467 case X86::VREDUCESHZrrib:
5468 case X86::VGETEXPSHZr:
5469 case X86::VGETEXPSHZrb:
5470 case X86::VGETEXPSHZm:
5471 case X86::VGETMANTSHZrri:
5472 case X86::VGETMANTSHZrrib:
5473 case X86::VGETMANTSHZrmi:
5474 case X86::VRNDSCALESHZr:
5475 case X86::VRNDSCALESHZr_Int:
5476 case X86::VRNDSCALESHZrb_Int:
5477 case X86::VRNDSCALESHZm:
5478 case X86::VRNDSCALESHZm_Int:
5479 case X86::VSQRTSHZr:
5480 case X86::VSQRTSHZr_Int:
5481 case X86::VSQRTSHZrb_Int:
5482 case X86::VSQRTSHZm:
5483 case X86::VSQRTSHZm_Int:
5484 case X86::VRCP28SDZr:
5485 case X86::VRCP28SDZrb:
5486 case X86::VRCP28SDZm:
5487 case X86::VRCP28SSZr:
5488 case X86::VRCP28SSZrb:
5489 case X86::VRCP28SSZm:
5490 case X86::VREDUCESSZrmi:
5491 case X86::VREDUCESSZrri:
5492 case X86::VREDUCESSZrrib:
5493 case X86::VRSQRT14SDZrr:
5494 case X86::VRSQRT14SDZrm:
5495 case X86::VRSQRT14SSZrr:
5496 case X86::VRSQRT14SSZrm:
5497 case X86::VRSQRT28SDZr:
5498 case X86::VRSQRT28SDZrb:
5499 case X86::VRSQRT28SDZm:
5500 case X86::VRSQRT28SSZr:
5501 case X86::VRSQRT28SSZrb:
5502 case X86::VRSQRT28SSZm:
5503 case X86::VSQRTSSZr:
5504 case X86::VSQRTSSZr_Int:
5505 case X86::VSQRTSSZrb_Int:
5506 case X86::VSQRTSSZm:
5507 case X86::VSQRTSSZm_Int:
5508 case X86::VSQRTSDZr:
5509 case X86::VSQRTSDZr_Int:
5510 case X86::VSQRTSDZrb_Int:
5511 case X86::VSQRTSDZm:
5512 case X86::VSQRTSDZm_Int:
5513 case X86::VCVTSD2SHZrr:
5514 case X86::VCVTSD2SHZrr_Int:
5515 case X86::VCVTSD2SHZrrb_Int:
5516 case X86::VCVTSD2SHZrm:
5517 case X86::VCVTSD2SHZrm_Int:
5518 case X86::VCVTSS2SHZrr:
5519 case X86::VCVTSS2SHZrr_Int:
5520 case X86::VCVTSS2SHZrrb_Int:
5521 case X86::VCVTSS2SHZrm:
5522 case X86::VCVTSS2SHZrm_Int:
5523 case X86::VCVTSH2SDZrr:
5524 case X86::VCVTSH2SDZrr_Int:
5525 case X86::VCVTSH2SDZrrb_Int:
5526 case X86::VCVTSH2SDZrm:
5527 case X86::VCVTSH2SDZrm_Int:
5528 case X86::VCVTSH2SSZrr:
5529 case X86::VCVTSH2SSZrr_Int:
5530 case X86::VCVTSH2SSZrrb_Int:
5531 case X86::VCVTSH2SSZrm:
5532 case X86::VCVTSH2SSZrm_Int:
5533 return OpNum == 1;
5534 case X86::VMOVSSZrrk:
5535 case X86::VMOVSDZrrk:
5536 return OpNum == 3 && !ForLoadFold;
5537 case X86::VMOVSSZrrkz:
5538 case X86::VMOVSDZrrkz:
5539 return OpNum == 2 && !ForLoadFold;
5540 }
5541
5542 return false;
5543}
5544
5545/// Inform the BreakFalseDeps pass how many idle instructions we would like
5546/// before certain undef register reads.
5547///
5548/// This catches the VCVTSI2SD family of instructions:
5549///
5550/// vcvtsi2sdq %rax, undef %xmm0, %xmm14
5551///
5552/// We should to be careful *not* to catch VXOR idioms which are presumably
5553/// handled specially in the pipeline:
5554///
5555/// vxorps undef %xmm1, undef %xmm1, %xmm1
5556///
5557/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
5558/// high bits that are passed-through are not live.
5559unsigned
5560X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned OpNum,
5561 const TargetRegisterInfo *TRI) const {
5562 const MachineOperand &MO = MI.getOperand(OpNum);
5563 if (Register::isPhysicalRegister(MO.getReg()) &&
5564 hasUndefRegUpdate(MI.getOpcode(), OpNum))
5565 return UndefRegClearance;
5566
5567 return 0;
5568}
5569
5570void X86InstrInfo::breakPartialRegDependency(
5571 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5572 Register Reg = MI.getOperand(OpNum).getReg();
5573 // If MI kills this register, the false dependence is already broken.
5574 if (MI.killsRegister(Reg, TRI))
5575 return;
5576
5577 if (X86::VR128RegClass.contains(Reg)) {
5578 // These instructions are all floating point domain, so xorps is the best
5579 // choice.
5580 unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
5581 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg)
5582 .addReg(Reg, RegState::Undef)
5583 .addReg(Reg, RegState::Undef);
5584 MI.addRegisterKilled(Reg, TRI, true);
5585 } else if (X86::VR256RegClass.contains(Reg)) {
5586 // Use vxorps to clear the full ymm register.
5587 // It wants to read and write the xmm sub-register.
5588 Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
5589 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
5590 .addReg(XReg, RegState::Undef)
5591 .addReg(XReg, RegState::Undef)
5592 .addReg(Reg, RegState::ImplicitDefine);
5593 MI.addRegisterKilled(Reg, TRI, true);
5594 } else if (X86::GR64RegClass.contains(Reg)) {
5595 // Using XOR32rr because it has shorter encoding and zeros up the upper bits
5596 // as well.
5597 Register XReg = TRI->getSubReg(Reg, X86::sub_32bit);
5598 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg)
5599 .addReg(XReg, RegState::Undef)
5600 .addReg(XReg, RegState::Undef)
5601 .addReg(Reg, RegState::ImplicitDefine);
5602 MI.addRegisterKilled(Reg, TRI, true);
5603 } else if (X86::GR32RegClass.contains(Reg)) {
5604 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg)
5605 .addReg(Reg, RegState::Undef)
5606 .addReg(Reg, RegState::Undef);
5607 MI.addRegisterKilled(Reg, TRI, true);
5608 }
5609}
5610
5611static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
5612 int PtrOffset = 0) {
5613 unsigned NumAddrOps = MOs.size();
5614
5615 if (NumAddrOps < 4) {
5616 // FrameIndex only - add an immediate offset (whether its zero or not).
5617 for (unsigned i = 0; i != NumAddrOps; ++i)
5618 MIB.add(MOs[i]);
5619 addOffset(MIB, PtrOffset);
5620 } else {
5621 // General Memory Addressing - we need to add any offset to an existing
5622 // offset.
5623 assert(MOs.size() == 5 && "Unexpected memory operand list length")(static_cast <bool> (MOs.size() == 5 && "Unexpected memory operand list length"
) ? void (0) : __assert_fail ("MOs.size() == 5 && \"Unexpected memory operand list length\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 5623, __extension__
__PRETTY_FUNCTION__))
;
5624 for (unsigned i = 0; i != NumAddrOps; ++i) {
5625 const MachineOperand &MO = MOs[i];
5626 if (i == 3 && PtrOffset != 0) {
5627 MIB.addDisp(MO, PtrOffset);
5628 } else {
5629 MIB.add(MO);
5630 }
5631 }
5632 }
5633}
5634
5635static void updateOperandRegConstraints(MachineFunction &MF,
5636 MachineInstr &NewMI,
5637 const TargetInstrInfo &TII) {
5638 MachineRegisterInfo &MRI = MF.getRegInfo();
5639 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
5640
5641 for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) {
5642 MachineOperand &MO = NewMI.getOperand(Idx);
5643 // We only need to update constraints on virtual register operands.
5644 if (!MO.isReg())
5645 continue;
5646 Register Reg = MO.getReg();
5647 if (!Reg.isVirtual())
5648 continue;
5649
5650 auto *NewRC = MRI.constrainRegClass(
5651 Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF));
5652 if (!NewRC) {
5653 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
5654 dbgs() << "WARNING: Unable to update register constraint for operand "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
5655 << Idx << " of instruction:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
5656 NewMI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
;
5657 }
5658 }
5659}
5660
5661static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
5662 ArrayRef<MachineOperand> MOs,
5663 MachineBasicBlock::iterator InsertPt,
5664 MachineInstr &MI,
5665 const TargetInstrInfo &TII) {
5666 // Create the base instruction with the memory operand as the first part.
5667 // Omit the implicit operands, something BuildMI can't do.
5668 MachineInstr *NewMI =
5669 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
5670 MachineInstrBuilder MIB(MF, NewMI);
5671 addOperands(MIB, MOs);
5672
5673 // Loop over the rest of the ri operands, converting them over.
5674 unsigned NumOps = MI.getDesc().getNumOperands() - 2;
5675 for (unsigned i = 0; i != NumOps; ++i) {
5676 MachineOperand &MO = MI.getOperand(i + 2);
5677 MIB.add(MO);
5678 }
5679 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), NumOps + 2))
5680 MIB.add(MO);
5681
5682 updateOperandRegConstraints(MF, *NewMI, TII);
5683
5684 MachineBasicBlock *MBB = InsertPt->getParent();
5685 MBB->insert(InsertPt, NewMI);
5686
5687 return MIB;
5688}
5689
5690static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
5691 unsigned OpNo, ArrayRef<MachineOperand> MOs,
5692 MachineBasicBlock::iterator InsertPt,
5693 MachineInstr &MI, const TargetInstrInfo &TII,
5694 int PtrOffset = 0) {
5695 // Omit the implicit operands, something BuildMI can't do.
5696 MachineInstr *NewMI =
5697 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
5698 MachineInstrBuilder MIB(MF, NewMI);
5699
5700 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
5701 MachineOperand &MO = MI.getOperand(i);
5702 if (i == OpNo) {
5703 assert(MO.isReg() && "Expected to fold into reg operand!")(static_cast <bool> (MO.isReg() && "Expected to fold into reg operand!"
) ? void (0) : __assert_fail ("MO.isReg() && \"Expected to fold into reg operand!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 5703, __extension__
__PRETTY_FUNCTION__))
;
5704 addOperands(MIB, MOs, PtrOffset);
5705 } else {
5706 MIB.add(MO);
5707 }
5708 }
5709
5710 updateOperandRegConstraints(MF, *NewMI, TII);
5711
5712 // Copy the NoFPExcept flag from the instruction we're fusing.
5713 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
5714 NewMI->setFlag(MachineInstr::MIFlag::NoFPExcept);
5715
5716 MachineBasicBlock *MBB = InsertPt->getParent();
5717 MBB->insert(InsertPt, NewMI);
5718
5719 return MIB;
5720}
5721
5722static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
5723 ArrayRef<MachineOperand> MOs,
5724 MachineBasicBlock::iterator InsertPt,
5725 MachineInstr &MI) {
5726 MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
5727 MI.getDebugLoc(), TII.get(Opcode));
5728 addOperands(MIB, MOs);
5729 return MIB.addImm(0);
5730}
5731
5732MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
5733 MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
5734 ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
5735 unsigned Size, Align Alignment) const {
5736 switch (MI.getOpcode()) {
5737 case X86::INSERTPSrr:
5738 case X86::VINSERTPSrr:
5739 case X86::VINSERTPSZrr:
5740 // Attempt to convert the load of inserted vector into a fold load
5741 // of a single float.
5742 if (OpNum == 2) {
5743 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
5744 unsigned ZMask = Imm & 15;
5745 unsigned DstIdx = (Imm >> 4) & 3;
5746 unsigned SrcIdx = (Imm >> 6) & 3;
5747
5748 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
5749 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
5750 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
5751 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(4)) {
5752 int PtrOffset = SrcIdx * 4;
5753 unsigned NewImm = (DstIdx << 4) | ZMask;
5754 unsigned NewOpCode =
5755 (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm :
5756 (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm :
5757 X86::INSERTPSrm;
5758 MachineInstr *NewMI =
5759 FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
5760 NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
5761 return NewMI;
5762 }
5763 }
5764 break;
5765 case X86::MOVHLPSrr:
5766 case X86::VMOVHLPSrr:
5767 case X86::VMOVHLPSZrr:
5768 // Move the upper 64-bits of the second operand to the lower 64-bits.
5769 // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
5770 // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
5771 if (OpNum == 2) {
5772 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
5773 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
5774 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
5775 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) {
5776 unsigned NewOpCode =
5777 (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
5778 (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
5779 X86::MOVLPSrm;
5780 MachineInstr *NewMI =
5781 FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
5782 return NewMI;
5783 }
5784 }
5785 break;
5786 case X86::UNPCKLPDrr:
5787 // If we won't be able to fold this to the memory form of UNPCKL, use
5788 // MOVHPD instead. Done as custom because we can't have this in the load
5789 // table twice.
5790 if (OpNum == 2) {
5791 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
5792 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
5793 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
5794 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) {
5795 MachineInstr *NewMI =
5796 FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
5797 return NewMI;
5798 }
5799 }
5800 break;
5801 }
5802
5803 return nullptr;
5804}
5805
5806static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
5807 MachineInstr &MI) {
5808 if (!hasUndefRegUpdate(MI.getOpcode(), 1, /*ForLoadFold*/true) ||
5809 !MI.getOperand(1).isReg())
5810 return false;
5811
5812 // The are two cases we need to handle depending on where in the pipeline
5813 // the folding attempt is being made.
5814 // -Register has the undef flag set.
5815 // -Register is produced by the IMPLICIT_DEF instruction.
5816
5817 if (MI.getOperand(1).isUndef())
5818 return true;
5819
5820 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5821 MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
5822 return VRegDef && VRegDef->isImplicitDef();
5823}
5824
5825MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
5826 MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
5827 ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
5828 unsigned Size, Align Alignment, bool AllowCommute) const {
5829 bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
5830 bool isTwoAddrFold = false;
5831
5832 // For CPUs that favor the register form of a call or push,
5833 // do not fold loads into calls or pushes, unless optimizing for size
5834 // aggressively.
5835 if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() &&
5836 (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r ||
5837 MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r ||
5838 MI.getOpcode() == X86::PUSH64r))
5839 return nullptr;
5840
5841 // Avoid partial and undef register update stalls unless optimizing for size.
5842 if (!MF.getFunction().hasOptSize() &&
5843 (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
5844 shouldPreventUndefRegUpdateMemFold(MF, MI)))
5845 return nullptr;
5846
5847 unsigned NumOps = MI.getDesc().getNumOperands();
5848 bool isTwoAddr =
5849 NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
5850
5851 // FIXME: AsmPrinter doesn't know how to handle
5852 // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
5853 if (MI.getOpcode() == X86::ADD32ri &&
5854 MI.getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
5855 return nullptr;
5856
5857 // GOTTPOFF relocation loads can only be folded into add instructions.
5858 // FIXME: Need to exclude other relocations that only support specific
5859 // instructions.
5860 if (MOs.size() == X86::AddrNumOperands &&
5861 MOs[X86::AddrDisp].getTargetFlags() == X86II::MO_GOTTPOFF &&
5862 MI.getOpcode() != X86::ADD64rr)
5863 return nullptr;
5864
5865 MachineInstr *NewMI = nullptr;
5866
5867 // Attempt to fold any custom cases we have.
5868 if (MachineInstr *CustomMI = foldMemoryOperandCustom(
5869 MF, MI, OpNum, MOs, InsertPt, Size, Alignment))
5870 return CustomMI;
5871
5872 const X86MemoryFoldTableEntry *I = nullptr;
5873
5874 // Folding a memory location into the two-address part of a two-address
5875 // instruction is different than folding it other places. It requires
5876 // replacing the *two* registers with the memory location.
5877 if (isTwoAddr && NumOps >= 2 && OpNum < 2 && MI.getOperand(0).isReg() &&
5878 MI.getOperand(1).isReg() &&
5879 MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
5880 I = lookupTwoAddrFoldTable(MI.getOpcode());
5881 isTwoAddrFold = true;
5882 } else {
5883 if (OpNum == 0) {
5884 if (MI.getOpcode() == X86::MOV32r0) {
5885 NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
5886 if (NewMI)
5887 return NewMI;
5888 }
5889 }
5890
5891 I = lookupFoldTable(MI.getOpcode(), OpNum);
5892 }
5893
5894 if (I != nullptr) {
5895 unsigned Opcode = I->DstOp;
5896 bool FoldedLoad =
5897 isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_LOAD) || OpNum > 0;
5898 bool FoldedStore =
5899 isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_STORE);
5900 MaybeAlign MinAlign =
5901 decodeMaybeAlign((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT);
5902 if (MinAlign && Alignment < *MinAlign)
5903 return nullptr;
5904 bool NarrowToMOV32rm = false;
5905 if (Size) {
5906 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
5907 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum,
5908 &RI, MF);
5909 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
5910 // Check if it's safe to fold the load. If the size of the object is
5911 // narrower than the load width, then it's not.
5912 // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
5913 if (FoldedLoad && Size < RCSize) {
5914 // If this is a 64-bit load, but the spill slot is 32, then we can do
5915 // a 32-bit load which is implicitly zero-extended. This likely is
5916 // due to live interval analysis remat'ing a load from stack slot.
5917 if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
5918 return nullptr;
5919 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
5920 return nullptr;
5921 Opcode = X86::MOV32rm;
5922 NarrowToMOV32rm = true;
5923 }
5924 // For stores, make sure the size of the object is equal to the size of
5925 // the store. If the object is larger, the extra bits would be garbage. If
5926 // the object is smaller we might overwrite another object or fault.
5927 if (FoldedStore && Size != RCSize)
5928 return nullptr;
5929 }
5930
5931 if (isTwoAddrFold)
5932 NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this);
5933 else
5934 NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
5935
5936 if (NarrowToMOV32rm) {
5937 // If this is the special case where we use a MOV32rm to load a 32-bit
5938 // value and zero-extend the top bits. Change the destination register
5939 // to a 32-bit one.
5940 Register DstReg = NewMI->getOperand(0).getReg();
5941 if (DstReg.isPhysical())
5942 NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
5943 else
5944 NewMI->getOperand(0).setSubReg(X86::sub_32bit);
5945 }
5946 return NewMI;
5947 }
5948
5949 // If the instruction and target operand are commutable, commute the
5950 // instruction and try again.
5951 if (AllowCommute) {
5952 unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
5953 if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
5954 bool HasDef = MI.getDesc().getNumDefs();
5955 Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
5956 Register Reg1 = MI.getOperand(CommuteOpIdx1).getReg();
5957 Register Reg2 = MI.getOperand(CommuteOpIdx2).getReg();
5958 bool Tied1 =
5959 0 == MI.getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
5960 bool Tied2 =
5961 0 == MI.getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
5962
5963 // If either of the commutable operands are tied to the destination
5964 // then we can not commute + fold.
5965 if ((HasDef && Reg0 == Reg1 && Tied1) ||
5966 (HasDef && Reg0 == Reg2 && Tied2))
5967 return nullptr;
5968
5969 MachineInstr *CommutedMI =
5970 commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
5971 if (!CommutedMI) {
5972 // Unable to commute.
5973 return nullptr;
5974 }
5975 if (CommutedMI != &MI) {
5976 // New instruction. We can't fold from this.
5977 CommutedMI->eraseFromParent();
5978 return nullptr;
5979 }
5980
5981 // Attempt to fold with the commuted version of the instruction.
5982 NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, Size,
5983 Alignment, /*AllowCommute=*/false);
5984 if (NewMI)
5985 return NewMI;
5986
5987 // Folding failed again - undo the commute before returning.
5988 MachineInstr *UncommutedMI =
5989 commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
5990 if (!UncommutedMI) {
5991 // Unable to commute.
5992 return nullptr;
5993 }
5994 if (UncommutedMI != &MI) {
5995 // New instruction. It doesn't need to be kept.
5996 UncommutedMI->eraseFromParent();
5997 return nullptr;
5998 }
5999
6000 // Return here to prevent duplicate fuse failure report.
6001 return nullptr;
6002 }
6003 }
6004
6005 // No fusion
6006 if (PrintFailedFusing && !MI.isCopy())
6007 dbgs() << "We failed to fuse operand " << OpNum << " in " << MI;
6008 return nullptr;
6009}
6010
6011MachineInstr *
6012X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
6013 ArrayRef<unsigned> Ops,
6014 MachineBasicBlock::iterator InsertPt,
6015 int FrameIndex, LiveIntervals *LIS,
6016 VirtRegMap *VRM) const {
6017 // Check switch flag
6018 if (NoFusing)
6019 return nullptr;
6020
6021 // Avoid partial and undef register update stalls unless optimizing for size.
6022 if (!MF.getFunction().hasOptSize() &&
6023 (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
6024 shouldPreventUndefRegUpdateMemFold(MF, MI)))
6025 return nullptr;
6026
6027 // Don't fold subreg spills, or reloads that use a high subreg.
6028 for (auto Op : Ops) {
6029 MachineOperand &MO = MI.getOperand(Op);
6030 auto SubReg = MO.getSubReg();
6031 if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
6032 return nullptr;
6033 }
6034
6035 const MachineFrameInfo &MFI = MF.getFrameInfo();
6036 unsigned Size = MFI.getObjectSize(FrameIndex);
6037 Align Alignment = MFI.getObjectAlign(FrameIndex);
6038 // If the function stack isn't realigned we don't want to fold instructions
6039 // that need increased alignment.
6040 if (!RI.hasStackRealignment(MF))
6041 Alignment =
6042 std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign());
6043 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
6044 unsigned NewOpc = 0;
6045 unsigned RCSize = 0;
6046 switch (MI.getOpcode()) {
6047 default: return nullptr;
6048 case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
6049 case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
6050 case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
6051 case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
6052 }
6053 // Check if it's safe to fold the load. If the size of the object is
6054 // narrower than the load width, then it's not.
6055 if (Size < RCSize)
6056 return nullptr;
6057 // Change to CMPXXri r, 0 first.
6058 MI.setDesc(get(NewOpc));
6059 MI.getOperand(1).ChangeToImmediate(0);
6060 } else if (Ops.size() != 1)
6061 return nullptr;
6062
6063 return foldMemoryOperandImpl(MF, MI, Ops[0],
6064 MachineOperand::CreateFI(FrameIndex), InsertPt,
6065 Size, Alignment, /*AllowCommute=*/true);
6066}
6067
6068/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
6069/// because the latter uses contents that wouldn't be defined in the folded
6070/// version. For instance, this transformation isn't legal:
6071/// movss (%rdi), %xmm0
6072/// addps %xmm0, %xmm0
6073/// ->
6074/// addps (%rdi), %xmm0
6075///
6076/// But this one is:
6077/// movss (%rdi), %xmm0
6078/// addss %xmm0, %xmm0
6079/// ->
6080/// addss (%rdi), %xmm0
6081///
6082static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
6083 const MachineInstr &UserMI,
6084 const MachineFunction &MF) {
6085 unsigned Opc = LoadMI.getOpcode();
6086 unsigned UserOpc = UserMI.getOpcode();
6087 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
6088 const TargetRegisterClass *RC =
6089 MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
6090 unsigned RegSize = TRI.getRegSizeInBits(*RC);
6091
6092 if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm ||
6093 Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt ||
6094 Opc == X86::VMOVSSZrm_alt) &&
6095 RegSize > 32) {
6096 // These instructions only load 32 bits, we can't fold them if the
6097 // destination register is wider than 32 bits (4 bytes), and its user
6098 // instruction isn't scalar (SS).
6099 switch (UserOpc) {
6100 case X86::CVTSS2SDrr_Int:
6101 case X86::VCVTSS2SDrr_Int:
6102 case X86::VCVTSS2SDZrr_Int:
6103 case X86::VCVTSS2SDZrr_Intk:
6104 case X86::VCVTSS2SDZrr_Intkz:
6105 case X86::CVTSS2SIrr_Int: case X86::CVTSS2SI64rr_Int:
6106 case X86::VCVTSS2SIrr_Int: case X86::VCVTSS2SI64rr_Int:
6107 case X86::VCVTSS2SIZrr_Int: case X86::VCVTSS2SI64Zrr_Int:
6108 case X86::CVTTSS2SIrr_Int: case X86::CVTTSS2SI64rr_Int:
6109 case X86::VCVTTSS2SIrr_Int: case X86::VCVTTSS2SI64rr_Int:
6110 case X86::VCVTTSS2SIZrr_Int: case X86::VCVTTSS2SI64Zrr_Int:
6111 case X86::VCVTSS2USIZrr_Int: case X86::VCVTSS2USI64Zrr_Int:
6112 case X86::VCVTTSS2USIZrr_Int: case X86::VCVTTSS2USI64Zrr_Int:
6113 case X86::RCPSSr_Int: case X86::VRCPSSr_Int:
6114 case X86::RSQRTSSr_Int: case X86::VRSQRTSSr_Int:
6115 case X86::ROUNDSSr_Int: case X86::VROUNDSSr_Int:
6116 case X86::COMISSrr_Int: case X86::VCOMISSrr_Int: case X86::VCOMISSZrr_Int:
6117 case X86::UCOMISSrr_Int:case X86::VUCOMISSrr_Int:case X86::VUCOMISSZrr_Int:
6118 case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
6119 case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int:
6120 case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
6121 case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
6122 case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
6123 case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
6124 case X86::SQRTSSr_Int: case X86::VSQRTSSr_Int: case X86::VSQRTSSZr_Int:
6125 case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
6126 case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
6127 case X86::VCMPSSZrr_Intk:
6128 case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
6129 case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
6130 case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
6131 case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
6132 case X86::VSQRTSSZr_Intk: case X86::VSQRTSSZr_Intkz:
6133 case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
6134 case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
6135 case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
6136 case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int:
6137 case X86::VFMADD213SSr_Int: case X86::VFNMADD213SSr_Int:
6138 case X86::VFMADD231SSr_Int: case X86::VFNMADD231SSr_Int:
6139 case X86::VFMSUB132SSr_Int: case X86::VFNMSUB132SSr_Int:
6140 case X86::VFMSUB213SSr_Int: case X86::VFNMSUB213SSr_Int:
6141 case X86::VFMSUB231SSr_Int: case X86::VFNMSUB231SSr_Int:
6142 case X86::VFMADD132SSZr_Int: case X86::VFNMADD132SSZr_Int:
6143 case X86::VFMADD213SSZr_Int: case X86::VFNMADD213SSZr_Int:
6144 case X86::VFMADD231SSZr_Int: case X86::VFNMADD231SSZr_Int:
6145 case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
6146 case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
6147 case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
6148 case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk:
6149 case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk:
6150 case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk:
6151 case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk:
6152 case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk:
6153 case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk:
6154 case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz:
6155 case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz:
6156 case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz:
6157 case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
6158 case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
6159 case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
6160 case X86::VFIXUPIMMSSZrri:
6161 case X86::VFIXUPIMMSSZrrik:
6162 case X86::VFIXUPIMMSSZrrikz:
6163 case X86::VFPCLASSSSZrr:
6164 case X86::VFPCLASSSSZrrk:
6165 case X86::VGETEXPSSZr:
6166 case X86::VGETEXPSSZrk:
6167 case X86::VGETEXPSSZrkz:
6168 case X86::VGETMANTSSZrri:
6169 case X86::VGETMANTSSZrrik:
6170 case X86::VGETMANTSSZrrikz:
6171 case X86::VRANGESSZrri:
6172 case X86::VRANGESSZrrik:
6173 case X86::VRANGESSZrrikz:
6174 case X86::VRCP14SSZrr:
6175 case X86::VRCP14SSZrrk:
6176 case X86::VRCP14SSZrrkz:
6177 case X86::VRCP28SSZr:
6178 case X86::VRCP28SSZrk:
6179 case X86::VRCP28SSZrkz:
6180 case X86::VREDUCESSZrri:
6181 case X86::VREDUCESSZrrik:
6182 case X86::VREDUCESSZrrikz:
6183 case X86::VRNDSCALESSZr_Int:
6184 case X86::VRNDSCALESSZr_Intk:
6185 case X86::VRNDSCALESSZr_Intkz:
6186 case X86::VRSQRT14SSZrr:
6187 case X86::VRSQRT14SSZrrk:
6188 case X86::VRSQRT14SSZrrkz:
6189 case X86::VRSQRT28SSZr:
6190 case X86::VRSQRT28SSZrk:
6191 case X86::VRSQRT28SSZrkz:
6192 case X86::VSCALEFSSZrr:
6193 case X86::VSCALEFSSZrrk:
6194 case X86::VSCALEFSSZrrkz:
6195 return false;
6196 default:
6197 return true;
6198 }
6199 }
6200
6201 if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm ||
6202 Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt ||
6203 Opc == X86::VMOVSDZrm_alt) &&
6204 RegSize > 64) {
6205 // These instructions only load 64 bits, we can't fold them if the
6206 // destination register is wider than 64 bits (8 bytes), and its user
6207 // instruction isn't scalar (SD).
6208 switch (UserOpc) {
6209 case X86::CVTSD2SSrr_Int:
6210 case X86::VCVTSD2SSrr_Int:
6211 case X86::VCVTSD2SSZrr_Int:
6212 case X86::VCVTSD2SSZrr_Intk:
6213 case X86::VCVTSD2SSZrr_Intkz:
6214 case X86::CVTSD2SIrr_Int: case X86::CVTSD2SI64rr_Int:
6215 case X86::VCVTSD2SIrr_Int: case X86::VCVTSD2SI64rr_Int:
6216 case X86::VCVTSD2SIZrr_Int: case X86::VCVTSD2SI64Zrr_Int:
6217 case X86::CVTTSD2SIrr_Int: case X86::CVTTSD2SI64rr_Int:
6218 case X86::VCVTTSD2SIrr_Int: case X86::VCVTTSD2SI64rr_Int:
6219 case X86::VCVTTSD2SIZrr_Int: case X86::VCVTTSD2SI64Zrr_Int:
6220 case X86::VCVTSD2USIZrr_Int: case X86::VCVTSD2USI64Zrr_Int:
6221 case X86::VCVTTSD2USIZrr_Int: case X86::VCVTTSD2USI64Zrr_Int:
6222 case X86::ROUNDSDr_Int: case X86::VROUNDSDr_Int:
6223 case X86::COMISDrr_Int: case X86::VCOMISDrr_Int: case X86::VCOMISDZrr_Int:
6224 case X86::UCOMISDrr_Int:case X86::VUCOMISDrr_Int:case X86::VUCOMISDZrr_Int:
6225 case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
6226 case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int:
6227 case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
6228 case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
6229 case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
6230 case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
6231 case X86::SQRTSDr_Int: case X86::VSQRTSDr_Int: case X86::VSQRTSDZr_Int:
6232 case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
6233 case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
6234 case X86::VCMPSDZrr_Intk:
6235 case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
6236 case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
6237 case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
6238 case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
6239 case X86::VSQRTSDZr_Intk: case X86::VSQRTSDZr_Intkz:
6240 case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
6241 case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
6242 case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
6243 case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int:
6244 case X86::VFMADD213SDr_Int: case X86::VFNMADD213SDr_Int:
6245 case X86::VFMADD231SDr_Int: case X86::VFNMADD231SDr_Int:
6246 case X86::VFMSUB132SDr_Int: case X86::VFNMSUB132SDr_Int:
6247 case X86::VFMSUB213SDr_Int: case X86::VFNMSUB213SDr_Int:
6248 case X86::VFMSUB231SDr_Int: case X86::VFNMSUB231SDr_Int:
6249 case X86::VFMADD132SDZr_Int: case X86::VFNMADD132SDZr_Int:
6250 case X86::VFMADD213SDZr_Int: case X86::VFNMADD213SDZr_Int:
6251 case X86::VFMADD231SDZr_Int: case X86::VFNMADD231SDZr_Int:
6252 case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
6253 case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
6254 case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
6255 case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk:
6256 case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk:
6257 case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk:
6258 case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk:
6259 case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk:
6260 case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk:
6261 case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz:
6262 case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz:
6263 case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz:
6264 case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
6265 case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
6266 case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
6267 case X86::VFIXUPIMMSDZrri:
6268 case X86::VFIXUPIMMSDZrrik:
6269 case X86::VFIXUPIMMSDZrrikz:
6270 case X86::VFPCLASSSDZrr:
6271 case X86::VFPCLASSSDZrrk:
6272 case X86::VGETEXPSDZr:
6273 case X86::VGETEXPSDZrk:
6274 case X86::VGETEXPSDZrkz:
6275 case X86::VGETMANTSDZrri:
6276 case X86::VGETMANTSDZrrik:
6277 case X86::VGETMANTSDZrrikz:
6278 case X86::VRANGESDZrri:
6279 case X86::VRANGESDZrrik:
6280 case X86::VRANGESDZrrikz:
6281 case X86::VRCP14SDZrr:
6282 case X86::VRCP14SDZrrk:
6283 case X86::VRCP14SDZrrkz:
6284 case X86::VRCP28SDZr:
6285 case X86::VRCP28SDZrk:
6286 case X86::VRCP28SDZrkz:
6287 case X86::VREDUCESDZrri:
6288 case X86::VREDUCESDZrrik:
6289 case X86::VREDUCESDZrrikz:
6290 case X86::VRNDSCALESDZr_Int:
6291 case X86::VRNDSCALESDZr_Intk:
6292 case X86::VRNDSCALESDZr_Intkz:
6293 case X86::VRSQRT14SDZrr:
6294 case X86::VRSQRT14SDZrrk:
6295 case X86::VRSQRT14SDZrrkz:
6296 case X86::VRSQRT28SDZr:
6297 case X86::VRSQRT28SDZrk:
6298 case X86::VRSQRT28SDZrkz:
6299 case X86::VSCALEFSDZrr:
6300 case X86::VSCALEFSDZrrk:
6301 case X86::VSCALEFSDZrrkz:
6302 return false;
6303 default:
6304 return true;
6305 }
6306 }
6307
6308 if ((Opc == X86::VMOVSHZrm || Opc == X86::VMOVSHZrm_alt) && RegSize > 16) {
6309 // These instructions only load 16 bits, we can't fold them if the
6310 // destination register is wider than 16 bits (2 bytes), and its user
6311 // instruction isn't scalar (SH).
6312 switch (UserOpc) {
6313 case X86::VADDSHZrr_Int:
6314 case X86::VCMPSHZrr_Int:
6315 case X86::VDIVSHZrr_Int:
6316 case X86::VMAXSHZrr_Int:
6317 case X86::VMINSHZrr_Int:
6318 case X86::VMULSHZrr_Int:
6319 case X86::VSUBSHZrr_Int:
6320 case X86::VADDSHZrr_Intk: case X86::VADDSHZrr_Intkz:
6321 case X86::VCMPSHZrr_Intk:
6322 case X86::VDIVSHZrr_Intk: case X86::VDIVSHZrr_Intkz:
6323 case X86::VMAXSHZrr_Intk: case X86::VMAXSHZrr_Intkz:
6324 case X86::VMINSHZrr_Intk: case X86::VMINSHZrr_Intkz:
6325 case X86::VMULSHZrr_Intk: case X86::VMULSHZrr_Intkz:
6326 case X86::VSUBSHZrr_Intk: case X86::VSUBSHZrr_Intkz:
6327 case X86::VFMADD132SHZr_Int: case X86::VFNMADD132SHZr_Int:
6328 case X86::VFMADD213SHZr_Int: case X86::VFNMADD213SHZr_Int:
6329 case X86::VFMADD231SHZr_Int: case X86::VFNMADD231SHZr_Int:
6330 case X86::VFMSUB132SHZr_Int: case X86::VFNMSUB132SHZr_Int:
6331 case X86::VFMSUB213SHZr_Int: case X86::VFNMSUB213SHZr_Int:
6332 case X86::VFMSUB231SHZr_Int: case X86::VFNMSUB231SHZr_Int:
6333 case X86::VFMADD132SHZr_Intk: case X86::VFNMADD132SHZr_Intk:
6334 case X86::VFMADD213SHZr_Intk: case X86::VFNMADD213SHZr_Intk:
6335 case X86::VFMADD231SHZr_Intk: case X86::VFNMADD231SHZr_Intk:
6336 case X86::VFMSUB132SHZr_Intk: case X86::VFNMSUB132SHZr_Intk:
6337 case X86::VFMSUB213SHZr_Intk: case X86::VFNMSUB213SHZr_Intk:
6338 case X86::VFMSUB231SHZr_Intk: case X86::VFNMSUB231SHZr_Intk:
6339 case X86::VFMADD132SHZr_Intkz: case X86::VFNMADD132SHZr_Intkz:
6340 case X86::VFMADD213SHZr_Intkz: case X86::VFNMADD213SHZr_Intkz:
6341 case X86::VFMADD231SHZr_Intkz: case X86::VFNMADD231SHZr_Intkz:
6342 case X86::VFMSUB132SHZr_Intkz: case X86::VFNMSUB132SHZr_Intkz:
6343 case X86::VFMSUB213SHZr_Intkz: case X86::VFNMSUB213SHZr_Intkz:
6344 case X86::VFMSUB231SHZr_Intkz: case X86::VFNMSUB231SHZr_Intkz:
6345 return false;
6346 default:
6347 return true;
6348 }
6349 }
6350
6351 return false;
6352}
6353
6354MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
6355 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
6356 MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
6357 LiveIntervals *LIS) const {
6358
6359 // TODO: Support the case where LoadMI loads a wide register, but MI
6360 // only uses a subreg.
6361 for (auto Op : Ops) {
6362 if (MI.getOperand(Op).getSubReg())
6363 return nullptr;
6364 }
6365
6366 // If loading from a FrameIndex, fold directly from the FrameIndex.
6367 unsigned NumOps = LoadMI.getDesc().getNumOperands();
6368 int FrameIndex;
6369 if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
6370 if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
6371 return nullptr;
6372 return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS);
6373 }
6374
6375 // Check switch flag
6376 if (NoFusing) return nullptr;
6377
6378 // Avoid partial and undef register update stalls unless optimizing for size.
6379 if (!MF.getFunction().hasOptSize() &&
6380 (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
6381 shouldPreventUndefRegUpdateMemFold(MF, MI)))
6382 return nullptr;
6383
6384 // Determine the alignment of the load.
6385 Align Alignment;
6386 if (LoadMI.hasOneMemOperand())
6387 Alignment = (*LoadMI.memoperands_begin())->getAlign();
6388 else
6389 switch (LoadMI.getOpcode()) {
6390 case X86::AVX512_512_SET0:
6391 case X86::AVX512_512_SETALLONES:
6392 Alignment = Align(64);
6393 break;
6394 case X86::AVX2_SETALLONES:
6395 case X86::AVX1_SETALLONES:
6396 case X86::AVX_SET0:
6397 case X86::AVX512_256_SET0:
6398 Alignment = Align(32);
6399 break;
6400 case X86::V_SET0:
6401 case X86::V_SETALLONES:
6402 case X86::AVX512_128_SET0:
6403 case X86::FsFLD0F128:
6404 case X86::AVX512_FsFLD0F128:
6405 Alignment = Align(16);
6406 break;
6407 case X86::MMX_SET0:
6408 case X86::FsFLD0SD:
6409 case X86::AVX512_FsFLD0SD:
6410 Alignment = Align(8);
6411 break;
6412 case X86::FsFLD0SS:
6413 case X86::AVX512_FsFLD0SS:
6414 Alignment = Align(4);
6415 break;
6416 case X86::AVX512_FsFLD0SH:
6417 Alignment = Align(2);
6418 break;
6419 default:
6420 return nullptr;
6421 }
6422 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
6423 unsigned NewOpc = 0;
6424 switch (MI.getOpcode()) {
6425 default: return nullptr;
6426 case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
6427 case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
6428 case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
6429 case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
6430 }
6431 // Change to CMPXXri r, 0 first.
6432 MI.setDesc(get(NewOpc));
6433 MI.getOperand(1).ChangeToImmediate(0);
6434 } else if (Ops.size() != 1)
6435 return nullptr;
6436
6437 // Make sure the subregisters match.
6438 // Otherwise we risk changing the size of the load.
6439 if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg())
6440 return nullptr;
6441
6442 SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
6443 switch (LoadMI.getOpcode()) {
6444 case X86::MMX_SET0:
6445 case X86::V_SET0:
6446 case X86::V_SETALLONES:
6447 case X86::AVX2_SETALLONES:
6448 case X86::AVX1_SETALLONES:
6449 case X86::AVX_SET0:
6450 case X86::AVX512_128_SET0:
6451 case X86::AVX512_256_SET0:
6452 case X86::AVX512_512_SET0:
6453 case X86::AVX512_512_SETALLONES:
6454 case X86::AVX512_FsFLD0SH:
6455 case X86::FsFLD0SD:
6456 case X86::AVX512_FsFLD0SD:
6457 case X86::FsFLD0SS:
6458 case X86::AVX512_FsFLD0SS:
6459 case X86::FsFLD0F128:
6460 case X86::AVX512_FsFLD0F128: {
6461 // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
6462 // Create a constant-pool entry and operands to load from it.
6463
6464 // Medium and large mode can't fold loads this way.
6465 if (MF.getTarget().getCodeModel() != CodeModel::Small &&
6466 MF.getTarget().getCodeModel() != CodeModel::Kernel)
6467 return nullptr;
6468
6469 // x86-32 PIC requires a PIC base register for constant pools.
6470 unsigned PICBase = 0;
6471 // Since we're using Small or Kernel code model, we can always use
6472 // RIP-relative addressing for a smaller encoding.
6473 if (Subtarget.is64Bit()) {
6474 PICBase = X86::RIP;
6475 } else if (MF.getTarget().isPositionIndependent()) {
6476 // FIXME: PICBase = getGlobalBaseReg(&MF);
6477 // This doesn't work for several reasons.
6478 // 1. GlobalBaseReg may have been spilled.
6479 // 2. It may not be live at MI.
6480 return nullptr;
6481 }
6482
6483 // Create a constant-pool entry.
6484 MachineConstantPool &MCP = *MF.getConstantPool();
6485 Type *Ty;
6486 unsigned Opc = LoadMI.getOpcode();
6487 if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS)
6488 Ty = Type::getFloatTy(MF.getFunction().getContext());
6489 else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
6490 Ty = Type::getDoubleTy(MF.getFunction().getContext());
6491 else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
6492 Ty = Type::getFP128Ty(MF.getFunction().getContext());
6493 else if (Opc == X86::AVX512_FsFLD0SH)
6494 Ty = Type::getHalfTy(MF.getFunction().getContext());
6495 else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
6496 Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
6497 16);
6498 else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
6499 Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
6500 Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
6501 8);
6502 else if (Opc == X86::MMX_SET0)
6503 Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
6504 2);
6505 else
6506 Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
6507 4);
6508
6509 bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
6510 Opc == X86::AVX512_512_SETALLONES ||
6511 Opc == X86::AVX1_SETALLONES);
6512 const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
6513 Constant::getNullValue(Ty);
6514 unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
6515
6516 // Create operands to load from the constant pool entry.
6517 MOs.push_back(MachineOperand::CreateReg(PICBase, false));
6518 MOs.push_back(MachineOperand::CreateImm(1));
6519 MOs.push_back(MachineOperand::CreateReg(0, false));
6520 MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
6521 MOs.push_back(MachineOperand::CreateReg(0, false));
6522 break;
6523 }
6524 default: {
6525 if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
6526 return nullptr;
6527
6528 // Folding a normal load. Just copy the load's address operands.
6529 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands,
6530 LoadMI.operands_begin() + NumOps);
6531 break;
6532 }
6533 }
6534 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
6535 /*Size=*/0, Alignment, /*AllowCommute=*/true);
6536}
6537
6538static SmallVector<MachineMemOperand *, 2>
6539extractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
6540 SmallVector<MachineMemOperand *, 2> LoadMMOs;
6541
6542 for (MachineMemOperand *MMO : MMOs) {
6543 if (!MMO->isLoad())
6544 continue;
6545
6546 if (!MMO->isStore()) {
6547 // Reuse the MMO.
6548 LoadMMOs.push_back(MMO);
6549 } else {
6550 // Clone the MMO and unset the store flag.
6551 LoadMMOs.push_back(MF.getMachineMemOperand(
6552 MMO, MMO->getFlags() & ~MachineMemOperand::MOStore));
6553 }
6554 }
6555
6556 return LoadMMOs;
6557}
6558
6559static SmallVector<MachineMemOperand *, 2>
6560extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
6561 SmallVector<MachineMemOperand *, 2> StoreMMOs;
6562
6563 for (MachineMemOperand *MMO : MMOs) {
6564 if (!MMO->isStore())
6565 continue;
6566
6567 if (!MMO->isLoad()) {
6568 // Reuse the MMO.
6569 StoreMMOs.push_back(MMO);
6570 } else {
6571 // Clone the MMO and unset the load flag.
6572 StoreMMOs.push_back(MF.getMachineMemOperand(
6573 MMO, MMO->getFlags() & ~MachineMemOperand::MOLoad));
6574 }
6575 }
6576
6577 return StoreMMOs;
6578}
6579
6580static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
6581 const TargetRegisterClass *RC,
6582 const X86Subtarget &STI) {
6583 assert(STI.hasAVX512() && "Expected at least AVX512!")(static_cast <bool> (STI.hasAVX512() && "Expected at least AVX512!"
) ? void (0) : __assert_fail ("STI.hasAVX512() && \"Expected at least AVX512!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 6583, __extension__
__PRETTY_FUNCTION__))
;
6584 unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC);
6585 assert((SpillSize == 64 || STI.hasVLX()) &&(static_cast <bool> ((SpillSize == 64 || STI.hasVLX()) &&
"Can't broadcast less than 64 bytes without AVX512VL!") ? void
(0) : __assert_fail ("(SpillSize == 64 || STI.hasVLX()) && \"Can't broadcast less than 64 bytes without AVX512VL!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 6586, __extension__
__PRETTY_FUNCTION__))
6586 "Can't broadcast less than 64 bytes without AVX512VL!")(static_cast <bool> ((SpillSize == 64 || STI.hasVLX()) &&
"Can't broadcast less than 64 bytes without AVX512VL!") ? void
(0) : __assert_fail ("(SpillSize == 64 || STI.hasVLX()) && \"Can't broadcast less than 64 bytes without AVX512VL!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 6586, __extension__
__PRETTY_FUNCTION__))
;
6587
6588 switch (I->Flags & TB_BCAST_MASK) {
6589 default: llvm_unreachable("Unexpected broadcast type!")::llvm::llvm_unreachable_internal("Unexpected broadcast type!"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 6589)
;
6590 case TB_BCAST_D:
6591 switch (SpillSize) {
6592 default: llvm_unreachable("Unknown spill size")::llvm::llvm_unreachable_internal("Unknown spill size", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 6592)
;
6593 case 16: return X86::VPBROADCASTDZ128rm;
6594 case 32: return X86::VPBROADCASTDZ256rm;
6595 case 64: return X86::VPBROADCASTDZrm;
6596 }
6597 break;
6598 case TB_BCAST_Q:
6599 switch (SpillSize) {
6600 default: llvm_unreachable("Unknown spill size")::llvm::llvm_unreachable_internal("Unknown spill size", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 6600)
;
6601 case 16: return X86::VPBROADCASTQZ128rm;
6602 case 32: return X86::VPBROADCASTQZ256rm;
6603 case 64: return X86::VPBROADCASTQZrm;
6604 }
6605 break;
6606 case TB_BCAST_SS:
6607 switch (SpillSize) {
6608 default: llvm_unreachable("Unknown spill size")::llvm::llvm_unreachable_internal("Unknown spill size", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 6608)
;
6609 case 16: return X86::VBROADCASTSSZ128rm;
6610 case 32: return X86::VBROADCASTSSZ256rm;
6611 case 64: return X86::VBROADCASTSSZrm;
6612 }
6613 break;
6614 case TB_BCAST_SD:
6615 switch (SpillSize) {
6616 default: llvm_unreachable("Unknown spill size")::llvm::llvm_unreachable_internal("Unknown spill size", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 6616)
;
6617 case 16: return X86::VMOVDDUPZ128rm;
6618 case 32: return X86::VBROADCASTSDZ256rm;
6619 case 64: return X86::VBROADCASTSDZrm;
6620 }
6621 break;
6622 }
6623}
6624
6625bool X86InstrInfo::unfoldMemoryOperand(
6626 MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
6627 bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
6628 const X86MemoryFoldTableEntry *I = lookupUnfoldTable(MI.getOpcode());
6629 if (I == nullptr)
6630 return false;
6631 unsigned Opc = I->DstOp;
6632 unsigned Index = I->Flags & TB_INDEX_MASK;
6633 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
6634 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
6635 bool FoldedBCast = I->Flags & TB_FOLDED_BCAST;
6636 if (UnfoldLoad && !FoldedLoad)
6637 return false;
6638 UnfoldLoad &= FoldedLoad;
6639 if (UnfoldStore && !FoldedStore)
6640 return false;
6641 UnfoldStore &= FoldedStore;
6642
6643 const MCInstrDesc &MCID = get(Opc);
6644
6645 const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
6646 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
6647 // TODO: Check if 32-byte or greater accesses are slow too?
6648 if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
6649 Subtarget.isUnalignedMem16Slow())
6650 // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
6651 // conservatively assume the address is unaligned. That's bad for
6652 // performance.
6653 return false;
6654 SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
6655 SmallVector<MachineOperand,2> BeforeOps;
6656 SmallVector<MachineOperand,2> AfterOps;
6657 SmallVector<MachineOperand,4> ImpOps;
6658 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
6659 MachineOperand &Op = MI.getOperand(i);
6660 if (i >= Index && i < Index + X86::AddrNumOperands)
6661 AddrOps.push_back(Op);
6662 else if (Op.isReg() && Op.isImplicit())
6663 ImpOps.push_back(Op);
6664 else if (i < Index)
6665 BeforeOps.push_back(Op);
6666 else if (i > Index)
6667 AfterOps.push_back(Op);
6668 }
6669
6670 // Emit the load or broadcast instruction.
6671 if (UnfoldLoad) {
6672 auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
6673
6674 unsigned Opc;
6675 if (FoldedBCast) {
6676 Opc = getBroadcastOpcode(I, RC, Subtarget);
6677 } else {
6678 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
6679 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
6680 Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget);
6681 }
6682
6683 DebugLoc DL;
6684 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg);
6685 for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
6686 MIB.add(AddrOps[i]);
6687 MIB.setMemRefs(MMOs);
6688 NewMIs.push_back(MIB);
6689
6690 if (UnfoldStore) {
6691 // Address operands cannot be marked isKill.
6692 for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
6693 MachineOperand &MO = NewMIs[0]->getOperand(i);
6694 if (MO.isReg())
6695 MO.setIsKill(false);
6696 }
6697 }
6698 }
6699
6700 // Emit the data processing instruction.
6701 MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI.getDebugLoc(), true);
6702 MachineInstrBuilder MIB(MF, DataMI);
6703
6704 if (FoldedStore)
6705 MIB.addReg(Reg, RegState::Define);
6706 for (MachineOperand &BeforeOp : BeforeOps)
6707 MIB.add(BeforeOp);
6708 if (FoldedLoad)
6709 MIB.addReg(Reg);
6710 for (MachineOperand &AfterOp : AfterOps)
6711 MIB.add(AfterOp);
6712 for (MachineOperand &ImpOp : ImpOps) {
6713 MIB.addReg(ImpOp.getReg(),
6714 getDefRegState(ImpOp.isDef()) |
6715 RegState::Implicit |
6716 getKillRegState(ImpOp.isKill()) |
6717 getDeadRegState(ImpOp.isDead()) |
6718 getUndefRegState(ImpOp.isUndef()));
6719 }
6720 // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
6721 switch (DataMI->getOpcode()) {
6722 default: break;
6723 case X86::CMP64ri32:
6724 case X86::CMP64ri8:
6725 case X86::CMP32ri:
6726 case X86::CMP32ri8:
6727 case X86::CMP16ri:
6728 case X86::CMP16ri8:
6729 case X86::CMP8ri: {
6730 MachineOperand &MO0 = DataMI->getOperand(0);
6731 MachineOperand &MO1 = DataMI->getOperand(1);
6732 if (MO1.isImm() && MO1.getImm() == 0) {
6733 unsigned NewOpc;
6734 switch (DataMI->getOpcode()) {
6735 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 6735)
;
6736 case X86::CMP64ri8:
6737 case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
6738 case X86::CMP32ri8:
6739 case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
6740 case X86::CMP16ri8:
6741 case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
6742 case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
6743 }
6744 DataMI->setDesc(get(NewOpc));
6745 MO1.ChangeToRegister(MO0.getReg(), false);
6746 }
6747 }
6748 }
6749 NewMIs.push_back(DataMI);
6750
6751 // Emit the store instruction.
6752 if (UnfoldStore) {
6753 const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
6754 auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
6755 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
6756 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
6757 unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
6758 DebugLoc DL;
6759 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
6760 for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
6761 MIB.add(AddrOps[i]);
6762 MIB.addReg(Reg, RegState::Kill);
6763 MIB.setMemRefs(MMOs);
6764 NewMIs.push_back(MIB);
6765 }
6766
6767 return true;
6768}
6769
6770bool
6771X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
6772 SmallVectorImpl<SDNode*> &NewNodes) const {
6773 if (!N->isMachineOpcode())
6774 return false;
6775
6776 const X86MemoryFoldTableEntry *I = lookupUnfoldTable(N->getMachineOpcode());
6777 if (I == nullptr)
6778 return false;
6779 unsigned Opc = I->DstOp;
6780 unsigned Index = I->Flags & TB_INDEX_MASK;
6781 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
6782 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
6783 bool FoldedBCast = I->Flags & TB_FOLDED_BCAST;
6784 const MCInstrDesc &MCID = get(Opc);
6785 MachineFunction &MF = DAG.getMachineFunction();
6786 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
6787 const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
6788 unsigned NumDefs = MCID.NumDefs;
6789 std::vector<SDValue> AddrOps;
6790 std::vector<SDValue> BeforeOps;
6791 std::vector<SDValue> AfterOps;
6792 SDLoc dl(N);
6793 unsigned NumOps = N->getNumOperands();
6794 for (unsigned i = 0; i != NumOps-1; ++i) {
6795 SDValue Op = N->getOperand(i);
6796 if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
6797 AddrOps.push_back(Op);
6798 else if (i < Index-NumDefs)
6799 BeforeOps.push_back(Op);
6800 else if (i > Index-NumDefs)
6801 AfterOps.push_back(Op);
6802 }
6803 SDValue Chain = N->getOperand(NumOps-1);
6804 AddrOps.push_back(Chain);
6805
6806 // Emit the load instruction.
6807 SDNode *Load = nullptr;
6808 if (FoldedLoad) {
6809 EVT VT = *TRI.legalclasstypes_begin(*RC);
6810 auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
6811 if (MMOs.empty() && RC == &X86::VR128RegClass &&
6812 Subtarget.isUnalignedMem16Slow())
6813 // Do not introduce a slow unaligned load.
6814 return false;
6815 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
6816 // memory access is slow above.
6817
6818 unsigned Opc;
6819 if (FoldedBCast) {
6820 Opc = getBroadcastOpcode(I, RC, Subtarget);
6821 } else {
6822 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
6823 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
6824 Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget);
6825 }
6826
6827 Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps);
6828 NewNodes.push_back(Load);
6829
6830 // Preserve memory reference information.
6831 DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs);
6832 }
6833
6834 // Emit the data processing instruction.
6835 std::vector<EVT> VTs;
6836 const TargetRegisterClass *DstRC = nullptr;
6837 if (MCID.getNumDefs() > 0) {
6838 DstRC = getRegClass(MCID, 0, &RI, MF);
6839 VTs.push_back(*TRI.legalclasstypes_begin(*DstRC));
6840 }
6841 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
6842 EVT VT = N->getValueType(i);
6843 if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
6844 VTs.push_back(VT);
6845 }
6846 if (Load)
6847 BeforeOps.push_back(SDValue(Load, 0));
6848 llvm::append_range(BeforeOps, AfterOps);
6849 // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
6850 switch (Opc) {
6851 default: break;
6852 case X86::CMP64ri32:
6853 case X86::CMP64ri8:
6854 case X86::CMP32ri:
6855 case X86::CMP32ri8:
6856 case X86::CMP16ri:
6857 case X86::CMP16ri8:
6858 case X86::CMP8ri:
6859 if (isNullConstant(BeforeOps[1])) {
6860 switch (Opc) {
6861 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 6861)
;
6862 case X86::CMP64ri8:
6863 case X86::CMP64ri32: Opc = X86::TEST64rr; break;
6864 case X86::CMP32ri8:
6865 case X86::CMP32ri: Opc = X86::TEST32rr; break;
6866 case X86::CMP16ri8:
6867 case X86::CMP16ri: Opc = X86::TEST16rr; break;
6868 case X86::CMP8ri: Opc = X86::TEST8rr; break;
6869 }
6870 BeforeOps[1] = BeforeOps[0];
6871 }
6872 }
6873 SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
6874 NewNodes.push_back(NewNode);
6875
6876 // Emit the store instruction.
6877 if (FoldedStore) {
6878 AddrOps.pop_back();
6879 AddrOps.push_back(SDValue(NewNode, 0));
6880 AddrOps.push_back(Chain);
6881 auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
6882 if (MMOs.empty() && RC == &X86::VR128RegClass &&
6883 Subtarget.isUnalignedMem16Slow())
6884 // Do not introduce a slow unaligned store.
6885 return false;
6886 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
6887 // memory access is slow above.
6888 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
6889 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
6890 SDNode *Store =
6891 DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
6892 dl, MVT::Other, AddrOps);
6893 NewNodes.push_back(Store);
6894
6895 // Preserve memory reference information.
6896 DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs);
6897 }
6898
6899 return true;
6900}
6901
6902unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
6903 bool UnfoldLoad, bool UnfoldStore,
6904 unsigned *LoadRegIndex) const {
6905 const X86MemoryFoldTableEntry *I = lookupUnfoldTable(Opc);
6906 if (I == nullptr)
6907 return 0;
6908 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
6909 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
6910 if (UnfoldLoad && !FoldedLoad)
6911 return 0;
6912 if (UnfoldStore && !FoldedStore)
6913 return 0;
6914 if (LoadRegIndex)
6915 *LoadRegIndex = I->Flags & TB_INDEX_MASK;
6916 return I->DstOp;
6917}
6918
6919bool
6920X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
6921 int64_t &Offset1, int64_t &Offset2) const {
6922 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
6923 return false;
6924 unsigned Opc1 = Load1->getMachineOpcode();
6925 unsigned Opc2 = Load2->getMachineOpcode();
6926 switch (Opc1) {
6927 default: return false;
6928 case X86::MOV8rm:
6929 case X86::MOV16rm:
6930 case X86::MOV32rm:
6931 case X86::MOV64rm:
6932 case X86::LD_Fp32m:
6933 case X86::LD_Fp64m:
6934 case X86::LD_Fp80m:
6935 case X86::MOVSSrm:
6936 case X86::MOVSSrm_alt:
6937 case X86::MOVSDrm:
6938 case X86::MOVSDrm_alt:
6939 case X86::MMX_MOVD64rm:
6940 case X86::MMX_MOVQ64rm:
6941 case X86::MOVAPSrm:
6942 case X86::MOVUPSrm:
6943 case X86::MOVAPDrm:
6944 case X86::MOVUPDrm:
6945 case X86::MOVDQArm:
6946 case X86::MOVDQUrm:
6947 // AVX load instructions
6948 case X86::VMOVSSrm:
6949 case X86::VMOVSSrm_alt:
6950 case X86::VMOVSDrm:
6951 case X86::VMOVSDrm_alt:
6952 case X86::VMOVAPSrm:
6953 case X86::VMOVUPSrm:
6954 case X86::VMOVAPDrm:
6955 case X86::VMOVUPDrm:
6956 case X86::VMOVDQArm:
6957 case X86::VMOVDQUrm:
6958 case X86::VMOVAPSYrm:
6959 case X86::VMOVUPSYrm:
6960 case X86::VMOVAPDYrm:
6961 case X86::VMOVUPDYrm:
6962 case X86::VMOVDQAYrm:
6963 case X86::VMOVDQUYrm:
6964 // AVX512 load instructions
6965 case X86::VMOVSSZrm:
6966 case X86::VMOVSSZrm_alt:
6967 case X86::VMOVSDZrm:
6968 case X86::VMOVSDZrm_alt:
6969 case X86::VMOVAPSZ128rm:
6970 case X86::VMOVUPSZ128rm:
6971 case X86::VMOVAPSZ128rm_NOVLX:
6972 case X86::VMOVUPSZ128rm_NOVLX:
6973 case X86::VMOVAPDZ128rm:
6974 case X86::VMOVUPDZ128rm:
6975 case X86::VMOVDQU8Z128rm:
6976 case X86::VMOVDQU16Z128rm:
6977 case X86::VMOVDQA32Z128rm:
6978 case X86::VMOVDQU32Z128rm:
6979 case X86::VMOVDQA64Z128rm:
6980 case X86::VMOVDQU64Z128rm:
6981 case X86::VMOVAPSZ256rm:
6982 case X86::VMOVUPSZ256rm:
6983 case X86::VMOVAPSZ256rm_NOVLX:
6984 case X86::VMOVUPSZ256rm_NOVLX:
6985 case X86::VMOVAPDZ256rm:
6986 case X86::VMOVUPDZ256rm:
6987 case X86::VMOVDQU8Z256rm:
6988 case X86::VMOVDQU16Z256rm:
6989 case X86::VMOVDQA32Z256rm:
6990 case X86::VMOVDQU32Z256rm:
6991 case X86::VMOVDQA64Z256rm:
6992 case X86::VMOVDQU64Z256rm:
6993 case X86::VMOVAPSZrm:
6994 case X86::VMOVUPSZrm:
6995 case X86::VMOVAPDZrm:
6996 case X86::VMOVUPDZrm:
6997 case X86::VMOVDQU8Zrm:
6998 case X86::VMOVDQU16Zrm:
6999 case X86::VMOVDQA32Zrm:
7000 case X86::VMOVDQU32Zrm:
7001 case X86::VMOVDQA64Zrm:
7002 case X86::VMOVDQU64Zrm:
7003 case X86::KMOVBkm:
7004 case X86::KMOVWkm:
7005 case X86::KMOVDkm:
7006 case X86::KMOVQkm:
7007 break;
7008 }
7009 switch (Opc2) {
7010 default: return false;
7011 case X86::MOV8rm:
7012 case X86::MOV16rm:
7013 case X86::MOV32rm:
7014 case X86::MOV64rm:
7015 case X86::LD_Fp32m:
7016 case X86::LD_Fp64m:
7017 case X86::LD_Fp80m:
7018 case X86::MOVSSrm:
7019 case X86::MOVSSrm_alt:
7020 case X86::MOVSDrm:
7021 case X86::MOVSDrm_alt:
7022 case X86::MMX_MOVD64rm:
7023 case X86::MMX_MOVQ64rm:
7024 case X86::MOVAPSrm:
7025 case X86::MOVUPSrm:
7026 case X86::MOVAPDrm:
7027 case X86::MOVUPDrm:
7028 case X86::MOVDQArm:
7029 case X86::MOVDQUrm:
7030 // AVX load instructions
7031 case X86::VMOVSSrm:
7032 case X86::VMOVSSrm_alt:
7033 case X86::VMOVSDrm:
7034 case X86::VMOVSDrm_alt:
7035 case X86::VMOVAPSrm:
7036 case X86::VMOVUPSrm:
7037 case X86::VMOVAPDrm:
7038 case X86::VMOVUPDrm:
7039 case X86::VMOVDQArm:
7040 case X86::VMOVDQUrm:
7041 case X86::VMOVAPSYrm:
7042 case X86::VMOVUPSYrm:
7043 case X86::VMOVAPDYrm:
7044 case X86::VMOVUPDYrm:
7045 case X86::VMOVDQAYrm:
7046 case X86::VMOVDQUYrm:
7047 // AVX512 load instructions
7048 case X86::VMOVSSZrm:
7049 case X86::VMOVSSZrm_alt:
7050 case X86::VMOVSDZrm:
7051 case X86::VMOVSDZrm_alt:
7052 case X86::VMOVAPSZ128rm:
7053 case X86::VMOVUPSZ128rm:
7054 case X86::VMOVAPSZ128rm_NOVLX:
7055 case X86::VMOVUPSZ128rm_NOVLX:
7056 case X86::VMOVAPDZ128rm:
7057 case X86::VMOVUPDZ128rm:
7058 case X86::VMOVDQU8Z128rm:
7059 case X86::VMOVDQU16Z128rm:
7060 case X86::VMOVDQA32Z128rm:
7061 case X86::VMOVDQU32Z128rm:
7062 case X86::VMOVDQA64Z128rm:
7063 case X86::VMOVDQU64Z128rm:
7064 case X86::VMOVAPSZ256rm:
7065 case X86::VMOVUPSZ256rm:
7066 case X86::VMOVAPSZ256rm_NOVLX:
7067 case X86::VMOVUPSZ256rm_NOVLX:
7068 case X86::VMOVAPDZ256rm:
7069 case X86::VMOVUPDZ256rm:
7070 case X86::VMOVDQU8Z256rm:
7071 case X86::VMOVDQU16Z256rm:
7072 case X86::VMOVDQA32Z256rm:
7073 case X86::VMOVDQU32Z256rm:
7074 case X86::VMOVDQA64Z256rm:
7075 case X86::VMOVDQU64Z256rm:
7076 case X86::VMOVAPSZrm:
7077 case X86::VMOVUPSZrm:
7078 case X86::VMOVAPDZrm:
7079 case X86::VMOVUPDZrm:
7080 case X86::VMOVDQU8Zrm:
7081 case X86::VMOVDQU16Zrm:
7082 case X86::VMOVDQA32Zrm:
7083 case X86::VMOVDQU32Zrm:
7084 case X86::VMOVDQA64Zrm:
7085 case X86::VMOVDQU64Zrm:
7086 case X86::KMOVBkm:
7087 case X86::KMOVWkm:
7088 case X86::KMOVDkm:
7089 case X86::KMOVQkm:
7090 break;
7091 }
7092
7093 // Lambda to check if both the loads have the same value for an operand index.
7094 auto HasSameOp = [&](int I) {
7095 return Load1->getOperand(I) == Load2->getOperand(I);
7096 };
7097
7098 // All operands except the displacement should match.
7099 if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) ||
7100 !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg))
7101 return false;
7102
7103 // Chain Operand must be the same.
7104 if (!HasSameOp(5))
7105 return false;
7106
7107 // Now let's examine if the displacements are constants.
7108 auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp));
7109 auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp));
7110 if (!Disp1 || !Disp2)
7111 return false;
7112
7113 Offset1 = Disp1->getSExtValue();
7114 Offset2 = Disp2->getSExtValue();
7115 return true;
7116}
7117
7118bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
7119 int64_t Offset1, int64_t Offset2,
7120 unsigned NumLoads) const {
7121 assert(Offset2 > Offset1)(static_cast <bool> (Offset2 > Offset1) ? void (0) :
__assert_fail ("Offset2 > Offset1", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 7121, __extension__ __PRETTY_FUNCTION__))
;
7122 if ((Offset2 - Offset1) / 8 > 64)
7123 return false;
7124
7125 unsigned Opc1 = Load1->getMachineOpcode();
7126 unsigned Opc2 = Load2->getMachineOpcode();
7127 if (Opc1 != Opc2)
7128 return false; // FIXME: overly conservative?
7129
7130 switch (Opc1) {
7131 default: break;
7132 case X86::LD_Fp32m:
7133 case X86::LD_Fp64m:
7134 case X86::LD_Fp80m:
7135 case X86::MMX_MOVD64rm:
7136 case X86::MMX_MOVQ64rm:
7137 return false;
7138 }
7139
7140 EVT VT = Load1->getValueType(0);
7141 switch (VT.getSimpleVT().SimpleTy) {
7142 default:
7143 // XMM registers. In 64-bit mode we can be a bit more aggressive since we
7144 // have 16 of them to play with.
7145 if (Subtarget.is64Bit()) {
7146 if (NumLoads >= 3)
7147 return false;
7148 } else if (NumLoads) {
7149 return false;
7150 }
7151 break;
7152 case MVT::i8:
7153 case MVT::i16:
7154 case MVT::i32:
7155 case MVT::i64:
7156 case MVT::f32:
7157 case MVT::f64:
7158 if (NumLoads)
7159 return false;
7160 break;
7161 }
7162
7163 return true;
7164}
7165
7166bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
7167 const MachineBasicBlock *MBB,
7168 const MachineFunction &MF) const {
7169
7170 // ENDBR instructions should not be scheduled around.
7171 unsigned Opcode = MI.getOpcode();
7172 if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
7173 Opcode == X86::LDTILECFG)
7174 return true;
7175
7176 return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
7177}
7178
7179bool X86InstrInfo::
7180reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
7181 assert(Cond.size() == 1 && "Invalid X86 branch condition!")(static_cast <bool> (Cond.size() == 1 && "Invalid X86 branch condition!"
) ? void (0) : __assert_fail ("Cond.size() == 1 && \"Invalid X86 branch condition!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7181, __extension__
__PRETTY_FUNCTION__))
;
7182 X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
7183 Cond[0].setImm(GetOppositeBranchCondition(CC));
7184 return false;
7185}
7186
7187bool X86InstrInfo::
7188isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
7189 // FIXME: Return false for x87 stack register classes for now. We can't
7190 // allow any loads of these registers before FpGet_ST0_80.
7191 return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
7192 RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
7193 RC == &X86::RFP80RegClass);
7194}
7195
7196/// Return a virtual register initialized with the
7197/// the global base register value. Output instructions required to
7198/// initialize the register in the function entry block, if necessary.
7199///
7200/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
7201///
7202unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
7203 assert((!Subtarget.is64Bit() ||(static_cast <bool> ((!Subtarget.is64Bit() || MF->getTarget
().getCodeModel() == CodeModel::Medium || MF->getTarget().
getCodeModel() == CodeModel::Large) && "X86-64 PIC uses RIP relative addressing"
) ? void (0) : __assert_fail ("(!Subtarget.is64Bit() || MF->getTarget().getCodeModel() == CodeModel::Medium || MF->getTarget().getCodeModel() == CodeModel::Large) && \"X86-64 PIC uses RIP relative addressing\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7206, __extension__
__PRETTY_FUNCTION__))
7204 MF->getTarget().getCodeModel() == CodeModel::Medium ||(static_cast <bool> ((!Subtarget.is64Bit() || MF->getTarget
().getCodeModel() == CodeModel::Medium || MF->getTarget().
getCodeModel() == CodeModel::Large) && "X86-64 PIC uses RIP relative addressing"
) ? void (0) : __assert_fail ("(!Subtarget.is64Bit() || MF->getTarget().getCodeModel() == CodeModel::Medium || MF->getTarget().getCodeModel() == CodeModel::Large) && \"X86-64 PIC uses RIP relative addressing\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7206, __extension__
__PRETTY_FUNCTION__))
7205 MF->getTarget().getCodeModel() == CodeModel::Large) &&(static_cast <bool> ((!Subtarget.is64Bit() || MF->getTarget
().getCodeModel() == CodeModel::Medium || MF->getTarget().
getCodeModel() == CodeModel::Large) && "X86-64 PIC uses RIP relative addressing"
) ? void (0) : __assert_fail ("(!Subtarget.is64Bit() || MF->getTarget().getCodeModel() == CodeModel::Medium || MF->getTarget().getCodeModel() == CodeModel::Large) && \"X86-64 PIC uses RIP relative addressing\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7206, __extension__
__PRETTY_FUNCTION__))
7206 "X86-64 PIC uses RIP relative addressing")(static_cast <bool> ((!Subtarget.is64Bit() || MF->getTarget
().getCodeModel() == CodeModel::Medium || MF->getTarget().
getCodeModel() == CodeModel::Large) && "X86-64 PIC uses RIP relative addressing"
) ? void (0) : __assert_fail ("(!Subtarget.is64Bit() || MF->getTarget().getCodeModel() == CodeModel::Medium || MF->getTarget().getCodeModel() == CodeModel::Large) && \"X86-64 PIC uses RIP relative addressing\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7206, __extension__
__PRETTY_FUNCTION__))
;
7207
7208 X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
7209 Register GlobalBaseReg = X86FI->getGlobalBaseReg();
7210 if (GlobalBaseReg != 0)
7211 return GlobalBaseReg;
7212
7213 // Create the register. The code to initialize it is inserted
7214 // later, by the CGBR pass (below).
7215 MachineRegisterInfo &RegInfo = MF->getRegInfo();
7216 GlobalBaseReg = RegInfo.createVirtualRegister(
7217 Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass);
7218 X86FI->setGlobalBaseReg(GlobalBaseReg);
7219 return GlobalBaseReg;
7220}
7221
7222// These are the replaceable SSE instructions. Some of these have Int variants
7223// that we don't include here. We don't want to replace instructions selected
7224// by intrinsics.
7225static const uint16_t ReplaceableInstrs[][3] = {
7226 //PackedSingle PackedDouble PackedInt
7227 { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
7228 { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
7229 { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
7230 { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
7231 { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
7232 { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
7233 { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr },
7234 { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
7235 { X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
7236 { X86::MOVSDrm_alt,X86::MOVSDrm_alt,X86::MOVQI2PQIrm },
7237 { X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
7238 { X86::MOVSSrm_alt,X86::MOVSSrm_alt,X86::MOVDI2PDIrm },
7239 { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
7240 { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
7241 { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
7242 { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
7243 { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
7244 { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
7245 { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
7246 { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
7247 { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
7248 { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
7249 { X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
7250 { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
7251 { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
7252 { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
7253 { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
7254 { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
7255 { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
7256 { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr },
7257 { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr },
7258 // AVX 128-bit support
7259 { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
7260 { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
7261 { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
7262 { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
7263 { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
7264 { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
7265 { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr },
7266 { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
7267 { X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
7268 { X86::VMOVSDrm_alt,X86::VMOVSDrm_alt,X86::VMOVQI2PQIrm },
7269 { X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
7270 { X86::VMOVSSrm_alt,X86::VMOVSSrm_alt,X86::VMOVDI2PDIrm },
7271 { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
7272 { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
7273 { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
7274 { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
7275 { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
7276 { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
7277 { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
7278 { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
7279 { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
7280 { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
7281 { X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
7282 { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
7283 { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
7284 { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
7285 { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
7286 { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
7287 { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
7288 { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr },
7289 { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr },
7290 // AVX 256-bit support
7291 { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
7292 { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
7293 { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
7294 { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
7295 { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
7296 { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
7297 { X86::VPERMPSYrm, X86::VPERMPSYrm, X86::VPERMDYrm },
7298 { X86::VPERMPSYrr, X86::VPERMPSYrr, X86::VPERMDYrr },
7299 { X86::VPERMPDYmi, X86::VPERMPDYmi, X86::VPERMQYmi },
7300 { X86::VPERMPDYri, X86::VPERMPDYri, X86::VPERMQYri },
7301 // AVX512 support
7302 { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
7303 { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
7304 { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr },
7305 { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
7306 { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
7307 { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
7308 { X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm },
7309 { X86::VMOVSDZrm_alt, X86::VMOVSDZrm_alt, X86::VMOVQI2PQIZrm },
7310 { X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
7311 { X86::VMOVSSZrm_alt, X86::VMOVSSZrm_alt, X86::VMOVDI2PDIZrm },
7312 { X86::VBROADCASTSSZ128rr,X86::VBROADCASTSSZ128rr,X86::VPBROADCASTDZ128rr },
7313 { X86::VBROADCASTSSZ128rm,X86::VBROADCASTSSZ128rm,X86::VPBROADCASTDZ128rm },
7314 { X86::VBROADCASTSSZ256rr,X86::VBROADCASTSSZ256rr,X86::VPBROADCASTDZ256rr },
7315 { X86::VBROADCASTSSZ256rm,X86::VBROADCASTSSZ256rm,X86::VPBROADCASTDZ256rm },
7316 { X86::VBROADCASTSSZrr, X86::VBROADCASTSSZrr, X86::VPBROADCASTDZrr },
7317 { X86::VBROADCASTSSZrm, X86::VBROADCASTSSZrm, X86::VPBROADCASTDZrm },
7318 { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128rr },
7319 { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128rm },
7320 { X86::VBROADCASTSDZ256rr,X86::VBROADCASTSDZ256rr,X86::VPBROADCASTQZ256rr },
7321 { X86::VBROADCASTSDZ256rm,X86::VBROADCASTSDZ256rm,X86::VPBROADCASTQZ256rm },
7322 { X86::VBROADCASTSDZrr, X86::VBROADCASTSDZrr, X86::VPBROADCASTQZrr },
7323 { X86::VBROADCASTSDZrm, X86::VBROADCASTSDZrm, X86::VPBROADCASTQZrm },
7324 { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr },
7325 { X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm },
7326 { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr },
7327 { X86::VINSERTF32x8Zrm, X86::VINSERTF32x8Zrm, X86::VINSERTI32x8Zrm },
7328 { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrr, X86::VINSERTI64x2Zrr },
7329 { X86::VINSERTF64x2Zrm, X86::VINSERTF64x2Zrm, X86::VINSERTI64x2Zrm },
7330 { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrr, X86::VINSERTI64x4Zrr },
7331 { X86::VINSERTF64x4Zrm, X86::VINSERTF64x4Zrm, X86::VINSERTI64x4Zrm },
7332 { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rr,X86::VINSERTI32x4Z256rr },
7333 { X86::VINSERTF32x4Z256rm,X86::VINSERTF32x4Z256rm,X86::VINSERTI32x4Z256rm },
7334 { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rr,X86::VINSERTI64x2Z256rr },
7335 { X86::VINSERTF64x2Z256rm,X86::VINSERTF64x2Z256rm,X86::VINSERTI64x2Z256rm },
7336 { X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zrr, X86::VEXTRACTI32x4Zrr },
7337 { X86::VEXTRACTF32x4Zmr, X86::VEXTRACTF32x4Zmr, X86::VEXTRACTI32x4Zmr },
7338 { X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zrr, X86::VEXTRACTI32x8Zrr },
7339 { X86::VEXTRACTF32x8Zmr, X86::VEXTRACTF32x8Zmr, X86::VEXTRACTI32x8Zmr },
7340 { X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zrr, X86::VEXTRACTI64x2Zrr },
7341 { X86::VEXTRACTF64x2Zmr, X86::VEXTRACTF64x2Zmr, X86::VEXTRACTI64x2Zmr },
7342 { X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zrr, X86::VEXTRACTI64x4Zrr },
7343 { X86::VEXTRACTF64x4Zmr, X86::VEXTRACTF64x4Zmr, X86::VEXTRACTI64x4Zmr },
7344 { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTI32x4Z256rr },
7345 { X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr },
7346 { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr },
7347 { X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr },
7348 { X86::VPERMILPSmi, X86::VPERMILPSmi, X86::VPSHUFDmi },
7349 { X86::VPERMILPSri, X86::VPERMILPSri, X86::VPSHUFDri },
7350 { X86::VPERMILPSZ128mi, X86::VPERMILPSZ128mi, X86::VPSHUFDZ128mi },
7351 { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128ri, X86::VPSHUFDZ128ri },
7352 { X86::VPERMILPSZ256mi, X86::VPERMILPSZ256mi, X86::VPSHUFDZ256mi },
7353 { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri },
7354 { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi },
7355 { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri },
7356 { X86::VPERMPSZ256rm, X86::VPERMPSZ256rm, X86::VPERMDZ256rm },
7357 { X86::VPERMPSZ256rr, X86::VPERMPSZ256rr, X86::VPERMDZ256rr },
7358 { X86::VPERMPDZ256mi, X86::VPERMPDZ256mi, X86::VPERMQZ256mi },
7359 { X86::VPERMPDZ256ri, X86::VPERMPDZ256ri, X86::VPERMQZ256ri },
7360 { X86::VPERMPDZ256rm, X86::VPERMPDZ256rm, X86::VPERMQZ256rm },
7361 { X86::VPERMPDZ256rr, X86::VPERMPDZ256rr, X86::VPERMQZ256rr },
7362 { X86::VPERMPSZrm, X86::VPERMPSZrm, X86::VPERMDZrm },
7363 { X86::VPERMPSZrr, X86::VPERMPSZrr, X86::VPERMDZrr },
7364 { X86::VPERMPDZmi, X86::VPERMPDZmi, X86::VPERMQZmi },
7365 { X86::VPERMPDZri, X86::VPERMPDZri, X86::VPERMQZri },
7366 { X86::VPERMPDZrm, X86::VPERMPDZrm, X86::VPERMQZrm },
7367 { X86::VPERMPDZrr, X86::VPERMPDZrr, X86::VPERMQZrr },
7368 { X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm },
7369 { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr },
7370 { X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm },
7371 { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr },
7372 { X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm },
7373 { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr },
7374 { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
7375 { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
7376 { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
7377 { X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
7378 { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
7379 { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
7380 { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm },
7381 { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr },
7382 { X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm },
7383 { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr },
7384 { X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm },
7385 { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr },
7386 { X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm },
7387 { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr },
7388 { X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm },
7389 { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
7390 { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
7391 { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
7392 { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr },
7393 { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr },
7394};
7395
7396static const uint16_t ReplaceableInstrsAVX2[][3] = {
7397 //PackedSingle PackedDouble PackedInt
7398 { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
7399 { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
7400 { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
7401 { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
7402 { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
7403 { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
7404 { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
7405 { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
7406 { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
7407 { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
7408 { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
7409 { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
7410 { X86::VMOVDDUPrm, X86::VMOVDDUPrm, X86::VPBROADCASTQrm},
7411 { X86::VMOVDDUPrr, X86::VMOVDDUPrr, X86::VPBROADCASTQrr},
7412 { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
7413 { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
7414 { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
7415 { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm},
7416 { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
7417 { X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri },
7418 { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
7419 { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
7420 { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri },
7421 { X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm },
7422 { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr },
7423 { X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm },
7424 { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr },
7425 { X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm },
7426 { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr },
7427 { X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm },
7428 { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr },
7429};
7430
7431static const uint16_t ReplaceableInstrsFP[][3] = {
7432 //PackedSingle PackedDouble
7433 { X86::MOVLPSrm, X86::MOVLPDrm, X86::INSTRUCTION_LIST_END },
7434 { X86::MOVHPSrm, X86::MOVHPDrm, X86::INSTRUCTION_LIST_END },
7435 { X86::MOVHPSmr, X86::MOVHPDmr, X86::INSTRUCTION_LIST_END },
7436 { X86::VMOVLPSrm, X86::VMOVLPDrm, X86::INSTRUCTION_LIST_END },
7437 { X86::VMOVHPSrm, X86::VMOVHPDrm, X86::INSTRUCTION_LIST_END },
7438 { X86::VMOVHPSmr, X86::VMOVHPDmr, X86::INSTRUCTION_LIST_END },
7439 { X86::VMOVLPSZ128rm, X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END },
7440 { X86::VMOVHPSZ128rm, X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END },
7441 { X86::VMOVHPSZ128mr, X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END },
7442};
7443
7444static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
7445 //PackedSingle PackedDouble PackedInt
7446 { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
7447 { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
7448 { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
7449 { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
7450};
7451
7452static const uint16_t ReplaceableInstrsAVX512[][4] = {
7453 // Two integer columns for 64-bit and 32-bit elements.
7454 //PackedSingle PackedDouble PackedInt PackedInt
7455 { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr },
7456 { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm },
7457 { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr },
7458 { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr },
7459 { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm },
7460 { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr },
7461 { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm },
7462 { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr },
7463 { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr },
7464 { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm },
7465 { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr },
7466 { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm },
7467 { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr },
7468 { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr },
7469 { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm },
7470};
7471
7472static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
7473 // Two integer columns for 64-bit and 32-bit elements.
7474 //PackedSingle PackedDouble PackedInt PackedInt
7475 { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
7476 { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
7477 { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
7478 { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
7479 { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm },
7480 { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr },
7481 { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
7482 { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
7483 { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
7484 { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
7485 { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
7486 { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
7487 { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm },
7488 { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr },
7489 { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
7490 { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
7491 { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm },
7492 { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr },
7493 { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm },
7494 { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr },
7495 { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm },
7496 { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr },
7497 { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm },
7498 { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr },
7499};
7500
7501static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
7502 // Two integer columns for 64-bit and 32-bit elements.
7503 //PackedSingle PackedDouble
7504 //PackedInt PackedInt
7505 { X86::VANDNPSZ128rmk, X86::VANDNPDZ128rmk,
7506 X86::VPANDNQZ128rmk, X86::VPANDNDZ128rmk },
7507 { X86::VANDNPSZ128rmkz, X86::VANDNPDZ128rmkz,
7508 X86::VPANDNQZ128rmkz, X86::VPANDNDZ128rmkz },
7509 { X86::VANDNPSZ128rrk, X86::VANDNPDZ128rrk,
7510 X86::VPANDNQZ128rrk, X86::VPANDNDZ128rrk },
7511 { X86::VANDNPSZ128rrkz, X86::VANDNPDZ128rrkz,
7512 X86::VPANDNQZ128rrkz, X86::VPANDNDZ128rrkz },
7513 { X86::VANDPSZ128rmk, X86::VANDPDZ128rmk,
7514 X86::VPANDQZ128rmk, X86::VPANDDZ128rmk },
7515 { X86::VANDPSZ128rmkz, X86::VANDPDZ128rmkz,
7516 X86::VPANDQZ128rmkz, X86::VPANDDZ128rmkz },
7517 { X86::VANDPSZ128rrk, X86::VANDPDZ128rrk,
7518 X86::VPANDQZ128rrk, X86::VPANDDZ128rrk },
7519 { X86::VANDPSZ128rrkz, X86::VANDPDZ128rrkz,
7520 X86::VPANDQZ128rrkz, X86::VPANDDZ128rrkz },
7521 { X86::VORPSZ128rmk, X86::VORPDZ128rmk,
7522 X86::VPORQZ128rmk, X86::VPORDZ128rmk },
7523 { X86::VORPSZ128rmkz, X86::VORPDZ128rmkz,
7524 X86::VPORQZ128rmkz, X86::VPORDZ128rmkz },
7525 { X86::VORPSZ128rrk, X86::VORPDZ128rrk,
7526 X86::VPORQZ128rrk, X86::VPORDZ128rrk },
7527 { X86::VORPSZ128rrkz, X86::VORPDZ128rrkz,
7528 X86::VPORQZ128rrkz, X86::VPORDZ128rrkz },
7529 { X86::VXORPSZ128rmk, X86::VXORPDZ128rmk,
7530 X86::VPXORQZ128rmk, X86::VPXORDZ128rmk },
7531 { X86::VXORPSZ128rmkz, X86::VXORPDZ128rmkz,
7532 X86::VPXORQZ128rmkz, X86::VPXORDZ128rmkz },
7533 { X86::VXORPSZ128rrk, X86::VXORPDZ128rrk,
7534 X86::VPXORQZ128rrk, X86::VPXORDZ128rrk },
7535 { X86::VXORPSZ128rrkz, X86::VXORPDZ128rrkz,
7536 X86::VPXORQZ128rrkz, X86::VPXORDZ128rrkz },
7537 { X86::VANDNPSZ256rmk, X86::VANDNPDZ256rmk,
7538 X86::VPANDNQZ256rmk, X86::VPANDNDZ256rmk },
7539 { X86::VANDNPSZ256rmkz, X86::VANDNPDZ256rmkz,
7540 X86::VPANDNQZ256rmkz, X86::VPANDNDZ256rmkz },
7541 { X86::VANDNPSZ256rrk, X86::VANDNPDZ256rrk,
7542 X86::VPANDNQZ256rrk, X86::VPANDNDZ256rrk },
7543 { X86::VANDNPSZ256rrkz, X86::VANDNPDZ256rrkz,
7544 X86::VPANDNQZ256rrkz, X86::VPANDNDZ256rrkz },
7545 { X86::VANDPSZ256rmk, X86::VANDPDZ256rmk,
7546 X86::VPANDQZ256rmk, X86::VPANDDZ256rmk },
7547 { X86::VANDPSZ256rmkz, X86::VANDPDZ256rmkz,
7548 X86::VPANDQZ256rmkz, X86::VPANDDZ256rmkz },
7549 { X86::VANDPSZ256rrk, X86::VANDPDZ256rrk,
7550 X86::VPANDQZ256rrk, X86::VPANDDZ256rrk },
7551 { X86::VANDPSZ256rrkz, X86::VANDPDZ256rrkz,
7552 X86::VPANDQZ256rrkz, X86::VPANDDZ256rrkz },
7553 { X86::VORPSZ256rmk, X86::VORPDZ256rmk,
7554 X86::VPORQZ256rmk, X86::VPORDZ256rmk },
7555 { X86::VORPSZ256rmkz, X86::VORPDZ256rmkz,
7556 X86::VPORQZ256rmkz, X86::VPORDZ256rmkz },
7557 { X86::VORPSZ256rrk, X86::VORPDZ256rrk,
7558 X86::VPORQZ256rrk, X86::VPORDZ256rrk },
7559 { X86::VORPSZ256rrkz, X86::VORPDZ256rrkz,
7560 X86::VPORQZ256rrkz, X86::VPORDZ256rrkz },
7561 { X86::VXORPSZ256rmk, X86::VXORPDZ256rmk,
7562 X86::VPXORQZ256rmk, X86::VPXORDZ256rmk },
7563 { X86::VXORPSZ256rmkz, X86::VXORPDZ256rmkz,
7564 X86::VPXORQZ256rmkz, X86::VPXORDZ256rmkz },
7565 { X86::VXORPSZ256rrk, X86::VXORPDZ256rrk,
7566 X86::VPXORQZ256rrk, X86::VPXORDZ256rrk },
7567 { X86::VXORPSZ256rrkz, X86::VXORPDZ256rrkz,
7568 X86::VPXORQZ256rrkz, X86::VPXORDZ256rrkz },
7569 { X86::VANDNPSZrmk, X86::VANDNPDZrmk,
7570 X86::VPANDNQZrmk, X86::VPANDNDZrmk },
7571 { X86::VANDNPSZrmkz, X86::VANDNPDZrmkz,
7572 X86::VPANDNQZrmkz, X86::VPANDNDZrmkz },
7573 { X86::VANDNPSZrrk, X86::VANDNPDZrrk,
7574 X86::VPANDNQZrrk, X86::VPANDNDZrrk },
7575 { X86::VANDNPSZrrkz, X86::VANDNPDZrrkz,
7576 X86::VPANDNQZrrkz, X86::VPANDNDZrrkz },
7577 { X86::VANDPSZrmk, X86::VANDPDZrmk,
7578 X86::VPANDQZrmk, X86::VPANDDZrmk },
7579 { X86::VANDPSZrmkz, X86::VANDPDZrmkz,
7580 X86::VPANDQZrmkz, X86::VPANDDZrmkz },
7581 { X86::VANDPSZrrk, X86::VANDPDZrrk,
7582 X86::VPANDQZrrk, X86::VPANDDZrrk },
7583 { X86::VANDPSZrrkz, X86::VANDPDZrrkz,
7584 X86::VPANDQZrrkz, X86::VPANDDZrrkz },
7585 { X86::VORPSZrmk, X86::VORPDZrmk,
7586 X86::VPORQZrmk, X86::VPORDZrmk },
7587 { X86::VORPSZrmkz, X86::VORPDZrmkz,
7588 X86::VPORQZrmkz, X86::VPORDZrmkz },
7589 { X86::VORPSZrrk, X86::VORPDZrrk,
7590 X86::VPORQZrrk, X86::VPORDZrrk },
7591 { X86::VORPSZrrkz, X86::VORPDZrrkz,
7592 X86::VPORQZrrkz, X86::VPORDZrrkz },
7593 { X86::VXORPSZrmk, X86::VXORPDZrmk,
7594 X86::VPXORQZrmk, X86::VPXORDZrmk },
7595 { X86::VXORPSZrmkz, X86::VXORPDZrmkz,
7596 X86::VPXORQZrmkz, X86::VPXORDZrmkz },
7597 { X86::VXORPSZrrk, X86::VXORPDZrrk,
7598 X86::VPXORQZrrk, X86::VPXORDZrrk },
7599 { X86::VXORPSZrrkz, X86::VXORPDZrrkz,
7600 X86::VPXORQZrrkz, X86::VPXORDZrrkz },
7601 // Broadcast loads can be handled the same as masked operations to avoid
7602 // changing element size.
7603 { X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb,
7604 X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb },
7605 { X86::VANDPSZ128rmb, X86::VANDPDZ128rmb,
7606 X86::VPANDQZ128rmb, X86::VPANDDZ128rmb },
7607 { X86::VORPSZ128rmb, X86::VORPDZ128rmb,
7608 X86::VPORQZ128rmb, X86::VPORDZ128rmb },
7609 { X86::VXORPSZ128rmb, X86::VXORPDZ128rmb,
7610 X86::VPXORQZ128rmb, X86::VPXORDZ128rmb },
7611 { X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb,
7612 X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb },
7613 { X86::VANDPSZ256rmb, X86::VANDPDZ256rmb,
7614 X86::VPANDQZ256rmb, X86::VPANDDZ256rmb },
7615 { X86::VORPSZ256rmb, X86::VORPDZ256rmb,
7616 X86::VPORQZ256rmb, X86::VPORDZ256rmb },
7617 { X86::VXORPSZ256rmb, X86::VXORPDZ256rmb,
7618 X86::VPXORQZ256rmb, X86::VPXORDZ256rmb },
7619 { X86::VANDNPSZrmb, X86::VANDNPDZrmb,
7620 X86::VPANDNQZrmb, X86::VPANDNDZrmb },
7621 { X86::VANDPSZrmb, X86::VANDPDZrmb,
7622 X86::VPANDQZrmb, X86::VPANDDZrmb },
7623 { X86::VANDPSZrmb, X86::VANDPDZrmb,
7624 X86::VPANDQZrmb, X86::VPANDDZrmb },
7625 { X86::VORPSZrmb, X86::VORPDZrmb,
7626 X86::VPORQZrmb, X86::VPORDZrmb },
7627 { X86::VXORPSZrmb, X86::VXORPDZrmb,
7628 X86::VPXORQZrmb, X86::VPXORDZrmb },
7629 { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk,
7630 X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk },
7631 { X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk,
7632 X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk },
7633 { X86::VORPSZ128rmbk, X86::VORPDZ128rmbk,
7634 X86::VPORQZ128rmbk, X86::VPORDZ128rmbk },
7635 { X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk,
7636 X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk },
7637 { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk,
7638 X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk },
7639 { X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk,
7640 X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk },
7641 { X86::VORPSZ256rmbk, X86::VORPDZ256rmbk,
7642 X86::VPORQZ256rmbk, X86::VPORDZ256rmbk },
7643 { X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk,
7644 X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk },
7645 { X86::VANDNPSZrmbk, X86::VANDNPDZrmbk,
7646 X86::VPANDNQZrmbk, X86::VPANDNDZrmbk },
7647 { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
7648 X86::VPANDQZrmbk, X86::VPANDDZrmbk },
7649 { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
7650 X86::VPANDQZrmbk, X86::VPANDDZrmbk },
7651 { X86::VORPSZrmbk, X86::VORPDZrmbk,
7652 X86::VPORQZrmbk, X86::VPORDZrmbk },
7653 { X86::VXORPSZrmbk, X86::VXORPDZrmbk,
7654 X86::VPXORQZrmbk, X86::VPXORDZrmbk },
7655 { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz,
7656 X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz},
7657 { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz,
7658 X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz },
7659 { X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz,
7660 X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz },
7661 { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz,
7662 X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz },
7663 { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz,
7664 X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz},
7665 { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz,
7666 X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz },
7667 { X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz,
7668 X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz },
7669 { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz,
7670 X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz },
7671 { X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz,
7672 X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz },
7673 { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
7674 X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
7675 { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
7676 X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
7677 { X86::VORPSZrmbkz, X86::VORPDZrmbkz,
7678 X86::VPORQZrmbkz, X86::VPORDZrmbkz },
7679 { X86::VXORPSZrmbkz, X86::VXORPDZrmbkz,
7680 X86::VPXORQZrmbkz, X86::VPXORDZrmbkz },
7681};
7682
7683// NOTE: These should only be used by the custom domain methods.
7684static const uint16_t ReplaceableBlendInstrs[][3] = {
7685 //PackedSingle PackedDouble PackedInt
7686 { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi },
7687 { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri },
7688 { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi },
7689 { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri },
7690 { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi },
7691 { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri },
7692};
7693static const uint16_t ReplaceableBlendAVX2Instrs[][3] = {
7694 //PackedSingle PackedDouble PackedInt
7695 { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi },
7696 { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri },
7697 { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi },
7698 { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri },
7699};
7700
7701// Special table for changing EVEX logic instructions to VEX.
7702// TODO: Should we run EVEX->VEX earlier?
7703static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = {
7704 // Two integer columns for 64-bit and 32-bit elements.
7705 //PackedSingle PackedDouble PackedInt PackedInt
7706 { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
7707 { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
7708 { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
7709 { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
7710 { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm },
7711 { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr },
7712 { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
7713 { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
7714 { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
7715 { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
7716 { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
7717 { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
7718 { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm },
7719 { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr },
7720 { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
7721 { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
7722};
7723
7724// FIXME: Some shuffle and unpack instructions have equivalents in different
7725// domains, but they require a bit more work than just switching opcodes.
7726
7727static const uint16_t *lookup(unsigned opcode, unsigned domain,
7728 ArrayRef<uint16_t[3]> Table) {
7729 for (const uint16_t (&Row)[3] : Table)
7730 if (Row[domain-1] == opcode)
7731 return Row;
7732 return nullptr;
7733}
7734
7735static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
7736 ArrayRef<uint16_t[4]> Table) {
7737 // If this is the integer domain make sure to check both integer columns.
7738 for (const uint16_t (&Row)[4] : Table)
7739 if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
7740 return Row;
7741 return nullptr;
7742}
7743
7744// Helper to attempt to widen/narrow blend masks.
7745static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth,
7746 unsigned NewWidth, unsigned *pNewMask = nullptr) {
7747 assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&(static_cast <bool> (((OldWidth % NewWidth) == 0 || (NewWidth
% OldWidth) == 0) && "Illegal blend mask scale") ? void
(0) : __assert_fail ("((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) && \"Illegal blend mask scale\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7748, __extension__
__PRETTY_FUNCTION__))
7748 "Illegal blend mask scale")(static_cast <bool> (((OldWidth % NewWidth) == 0 || (NewWidth
% OldWidth) == 0) && "Illegal blend mask scale") ? void
(0) : __assert_fail ("((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) && \"Illegal blend mask scale\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7748, __extension__
__PRETTY_FUNCTION__))
;
7749 unsigned NewMask = 0;
7750
7751 if ((OldWidth % NewWidth) == 0) {
7752 unsigned Scale = OldWidth / NewWidth;
7753 unsigned SubMask = (1u << Scale) - 1;
7754 for (unsigned i = 0; i != NewWidth; ++i) {
7755 unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
7756 if (Sub == SubMask)
7757 NewMask |= (1u << i);
7758 else if (Sub != 0x0)
7759 return false;
7760 }
7761 } else {
7762 unsigned Scale = NewWidth / OldWidth;
7763 unsigned SubMask = (1u << Scale) - 1;
7764 for (unsigned i = 0; i != OldWidth; ++i) {
7765 if (OldMask & (1 << i)) {
7766 NewMask |= (SubMask << (i * Scale));
7767 }
7768 }
7769 }
7770
7771 if (pNewMask)
7772 *pNewMask = NewMask;
7773 return true;
7774}
7775
7776uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
7777 unsigned Opcode = MI.getOpcode();
7778 unsigned NumOperands = MI.getDesc().getNumOperands();
7779
7780 auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) {
7781 uint16_t validDomains = 0;
7782 if (MI.getOperand(NumOperands - 1).isImm()) {
7783 unsigned Imm = MI.getOperand(NumOperands - 1).getImm();
7784 if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4))
7785 validDomains |= 0x2; // PackedSingle
7786 if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2))
7787 validDomains |= 0x4; // PackedDouble
7788 if (!Is256 || Subtarget.hasAVX2())
7789 validDomains |= 0x8; // PackedInt
7790 }
7791 return validDomains;
7792 };
7793
7794 switch (Opcode) {
7795 case X86::BLENDPDrmi:
7796 case X86::BLENDPDrri:
7797 case X86::VBLENDPDrmi:
7798 case X86::VBLENDPDrri:
7799 return GetBlendDomains(2, false);
7800 case X86::VBLENDPDYrmi:
7801 case X86::VBLENDPDYrri:
7802 return GetBlendDomains(4, true);
7803 case X86::BLENDPSrmi:
7804 case X86::BLENDPSrri:
7805 case X86::VBLENDPSrmi:
7806 case X86::VBLENDPSrri:
7807 case X86::VPBLENDDrmi:
7808 case X86::VPBLENDDrri:
7809 return GetBlendDomains(4, false);
7810 case X86::VBLENDPSYrmi:
7811 case X86::VBLENDPSYrri:
7812 case X86::VPBLENDDYrmi:
7813 case X86::VPBLENDDYrri:
7814 return GetBlendDomains(8, true);
7815 case X86::PBLENDWrmi:
7816 case X86::PBLENDWrri:
7817 case X86::VPBLENDWrmi:
7818 case X86::VPBLENDWrri:
7819 // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks.
7820 case X86::VPBLENDWYrmi:
7821 case X86::VPBLENDWYrri:
7822 return GetBlendDomains(8, false);
7823 case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
7824 case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
7825 case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
7826 case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
7827 case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
7828 case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
7829 case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
7830 case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
7831 case X86::VPORDZ128rr: case X86::VPORDZ128rm:
7832 case X86::VPORDZ256rr: case X86::VPORDZ256rm:
7833 case X86::VPORQZ128rr: case X86::VPORQZ128rm:
7834 case X86::VPORQZ256rr: case X86::VPORQZ256rm:
7835 case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
7836 case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
7837 case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
7838 case X86::VPXORQZ256rr: case X86::VPXORQZ256rm:
7839 // If we don't have DQI see if we can still switch from an EVEX integer
7840 // instruction to a VEX floating point instruction.
7841 if (Subtarget.hasDQI())
7842 return 0;
7843
7844 if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16)
7845 return 0;
7846 if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16)
7847 return 0;
7848 // Register forms will have 3 operands. Memory form will have more.
7849 if (NumOperands == 3 &&
7850 RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16)
7851 return 0;
7852
7853 // All domains are valid.
7854 return 0xe;
7855 case X86::MOVHLPSrr:
7856 // We can swap domains when both inputs are the same register.
7857 // FIXME: This doesn't catch all the cases we would like. If the input
7858 // register isn't KILLed by the instruction, the two address instruction
7859 // pass puts a COPY on one input. The other input uses the original
7860 // register. This prevents the same physical register from being used by
7861 // both inputs.
7862 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
7863 MI.getOperand(0).getSubReg() == 0 &&
7864 MI.getOperand(1).getSubReg() == 0 &&
7865 MI.getOperand(2).getSubReg() == 0)
7866 return 0x6;
7867 return 0;
7868 case X86::SHUFPDrri:
7869 return 0x6;
7870 }
7871 return 0;
7872}
7873
7874bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
7875 unsigned Domain) const {
7876 assert(Domain > 0 && Domain < 4 && "Invalid execution domain")(static_cast <bool> (Domain > 0 && Domain <
4 && "Invalid execution domain") ? void (0) : __assert_fail
("Domain > 0 && Domain < 4 && \"Invalid execution domain\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7876, __extension__
__PRETTY_FUNCTION__))
;
7877 uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
7878 assert(dom && "Not an SSE instruction")(static_cast <bool> (dom && "Not an SSE instruction"
) ? void (0) : __assert_fail ("dom && \"Not an SSE instruction\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7878, __extension__
__PRETTY_FUNCTION__))
;
7879
7880 unsigned Opcode = MI.getOpcode();
7881 unsigned NumOperands = MI.getDesc().getNumOperands();
7882
7883 auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) {
7884 if (MI.getOperand(NumOperands - 1).isImm()) {
7885 unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255;
7886 Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
7887 unsigned NewImm = Imm;
7888
7889 const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs);
7890 if (!table)
7891 table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
7892
7893 if (Domain == 1) { // PackedSingle
7894 AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
7895 } else if (Domain == 2) { // PackedDouble
7896 AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm);
7897 } else if (Domain == 3) { // PackedInt
7898 if (Subtarget.hasAVX2()) {
7899 // If we are already VPBLENDW use that, else use VPBLENDD.
7900 if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
7901 table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
7902 AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
7903 }
7904 } else {
7905 assert(!Is256 && "128-bit vector expected")(static_cast <bool> (!Is256 && "128-bit vector expected"
) ? void (0) : __assert_fail ("!Is256 && \"128-bit vector expected\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7905, __extension__
__PRETTY_FUNCTION__))
;
7906 AdjustBlendMask(Imm, ImmWidth, 8, &NewImm);
7907 }
7908 }
7909
7910 assert(table && table[Domain - 1] && "Unknown domain op")(static_cast <bool> (table && table[Domain - 1]
&& "Unknown domain op") ? void (0) : __assert_fail (
"table && table[Domain - 1] && \"Unknown domain op\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7910, __extension__
__PRETTY_FUNCTION__))
;
7911 MI.setDesc(get(table[Domain - 1]));
7912 MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
7913 }
7914 return true;
7915 };
7916
7917 switch (Opcode) {
7918 case X86::BLENDPDrmi:
7919 case X86::BLENDPDrri:
7920 case X86::VBLENDPDrmi:
7921 case X86::VBLENDPDrri:
7922 return SetBlendDomain(2, false);
7923 case X86::VBLENDPDYrmi:
7924 case X86::VBLENDPDYrri:
7925 return SetBlendDomain(4, true);
7926 case X86::BLENDPSrmi:
7927 case X86::BLENDPSrri:
7928 case X86::VBLENDPSrmi:
7929 case X86::VBLENDPSrri:
7930 case X86::VPBLENDDrmi:
7931 case X86::VPBLENDDrri:
7932 return SetBlendDomain(4, false);
7933 case X86::VBLENDPSYrmi:
7934 case X86::VBLENDPSYrri:
7935 case X86::VPBLENDDYrmi:
7936 case X86::VPBLENDDYrri:
7937 return SetBlendDomain(8, true);
7938 case X86::PBLENDWrmi:
7939 case X86::PBLENDWrri:
7940 case X86::VPBLENDWrmi:
7941 case X86::VPBLENDWrri:
7942 return SetBlendDomain(8, false);
7943 case X86::VPBLENDWYrmi:
7944 case X86::VPBLENDWYrri:
7945 return SetBlendDomain(16, true);
7946 case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
7947 case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
7948 case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
7949 case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
7950 case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
7951 case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
7952 case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
7953 case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
7954 case X86::VPORDZ128rr: case X86::VPORDZ128rm:
7955 case X86::VPORDZ256rr: case X86::VPORDZ256rm:
7956 case X86::VPORQZ128rr: case X86::VPORQZ128rm:
7957 case X86::VPORQZ256rr: case X86::VPORQZ256rm:
7958 case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
7959 case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
7960 case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
7961 case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: {
7962 // Without DQI, convert EVEX instructions to VEX instructions.
7963 if (Subtarget.hasDQI())
7964 return false;
7965
7966 const uint16_t *table = lookupAVX512(MI.getOpcode(), dom,
7967 ReplaceableCustomAVX512LogicInstrs);
7968 assert(table && "Instruction not found in table?")(static_cast <bool> (table && "Instruction not found in table?"
) ? void (0) : __assert_fail ("table && \"Instruction not found in table?\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 7968, __extension__
__PRETTY_FUNCTION__))
;
7969 // Don't change integer Q instructions to D instructions and
7970 // use D intructions if we started with a PS instruction.
7971 if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
7972 Domain = 4;
7973 MI.setDesc(get(table[Domain - 1]));
7974 return true;
7975 }
7976 case X86::UNPCKHPDrr:
7977 case X86::MOVHLPSrr:
7978 // We just need to commute the instruction which will switch the domains.
7979 if (Domain != dom && Domain != 3 &&
7980 MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
7981 MI.getOperand(0).getSubReg() == 0 &&
7982 MI.getOperand(1).getSubReg() == 0 &&
7983 MI.getOperand(2).getSubReg() == 0) {
7984 commuteInstruction(MI, false);
7985 return true;
7986 }
7987 // We must always return true for MOVHLPSrr.
7988 if (Opcode == X86::MOVHLPSrr)
7989 return true;
7990 break;
7991 case X86::SHUFPDrri: {
7992 if (Domain == 1) {
7993 unsigned Imm = MI.getOperand(3).getImm();
7994 unsigned NewImm = 0x44;
7995 if (Imm & 1) NewImm |= 0x0a;
7996 if (Imm & 2) NewImm |= 0xa0;
7997 MI.getOperand(3).setImm(NewImm);
7998 MI.setDesc(get(X86::SHUFPSrri));
7999 }
8000 return true;
8001 }
8002 }
8003 return false;
8004}
8005
8006std::pair<uint16_t, uint16_t>
8007X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
8008 uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
8009 unsigned opcode = MI.getOpcode();
8010 uint16_t validDomains = 0;
8011 if (domain) {
8012 // Attempt to match for custom instructions.
8013 validDomains = getExecutionDomainCustom(MI);
8014 if (validDomains)
8015 return std::make_pair(domain, validDomains);
8016
8017 if (lookup(opcode, domain, ReplaceableInstrs)) {
8018 validDomains = 0xe;
8019 } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
8020 validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
8021 } else if (lookup(opcode, domain, ReplaceableInstrsFP)) {
8022 validDomains = 0x6;
8023 } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
8024 // Insert/extract instructions should only effect domain if AVX2
8025 // is enabled.
8026 if (!Subtarget.hasAVX2())
8027 return std::make_pair(0, 0);
8028 validDomains = 0xe;
8029 } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
8030 validDomains = 0xe;
8031 } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain,
8032 ReplaceableInstrsAVX512DQ)) {
8033 validDomains = 0xe;
8034 } else if (Subtarget.hasDQI()) {
8035 if (const uint16_t *table = lookupAVX512(opcode, domain,
8036 ReplaceableInstrsAVX512DQMasked)) {
8037 if (domain == 1 || (domain == 3 && table[3] == opcode))
8038 validDomains = 0xa;
8039 else
8040 validDomains = 0xc;
8041 }
8042 }
8043 }
8044 return std::make_pair(domain, validDomains);
8045}
8046
8047void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
8048 assert(Domain>0 && Domain<4 && "Invalid execution domain")(static_cast <bool> (Domain>0 && Domain<4
&& "Invalid execution domain") ? void (0) : __assert_fail
("Domain>0 && Domain<4 && \"Invalid execution domain\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8048, __extension__
__PRETTY_FUNCTION__))
;
8049 uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
8050 assert(dom && "Not an SSE instruction")(static_cast <bool> (dom && "Not an SSE instruction"
) ? void (0) : __assert_fail ("dom && \"Not an SSE instruction\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8050, __extension__
__PRETTY_FUNCTION__))
;
8051
8052 // Attempt to match for custom instructions.
8053 if (setExecutionDomainCustom(MI, Domain))
8054 return;
8055
8056 const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
8057 if (!table) { // try the other table
8058 assert((Subtarget.hasAVX2() || Domain < 3) &&(static_cast <bool> ((Subtarget.hasAVX2() || Domain <
3) && "256-bit vector operations only available in AVX2"
) ? void (0) : __assert_fail ("(Subtarget.hasAVX2() || Domain < 3) && \"256-bit vector operations only available in AVX2\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8059, __extension__
__PRETTY_FUNCTION__))
8059 "256-bit vector operations only available in AVX2")(static_cast <bool> ((Subtarget.hasAVX2() || Domain <
3) && "256-bit vector operations only available in AVX2"
) ? void (0) : __assert_fail ("(Subtarget.hasAVX2() || Domain < 3) && \"256-bit vector operations only available in AVX2\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8059, __extension__
__PRETTY_FUNCTION__))
;
8060 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
8061 }
8062 if (!table) { // try the FP table
8063 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP);
8064 assert((!table || Domain < 3) &&(static_cast <bool> ((!table || Domain < 3) &&
"Can only select PackedSingle or PackedDouble") ? void (0) :
__assert_fail ("(!table || Domain < 3) && \"Can only select PackedSingle or PackedDouble\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8065, __extension__
__PRETTY_FUNCTION__))
8065 "Can only select PackedSingle or PackedDouble")(static_cast <bool> ((!table || Domain < 3) &&
"Can only select PackedSingle or PackedDouble") ? void (0) :
__assert_fail ("(!table || Domain < 3) && \"Can only select PackedSingle or PackedDouble\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8065, __extension__
__PRETTY_FUNCTION__))
;
8066 }
8067 if (!table) { // try the other table
8068 assert(Subtarget.hasAVX2() &&(static_cast <bool> (Subtarget.hasAVX2() && "256-bit insert/extract only available in AVX2"
) ? void (0) : __assert_fail ("Subtarget.hasAVX2() && \"256-bit insert/extract only available in AVX2\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8069, __extension__
__PRETTY_FUNCTION__))
8069 "256-bit insert/extract only available in AVX2")(static_cast <bool> (Subtarget.hasAVX2() && "256-bit insert/extract only available in AVX2"
) ? void (0) : __assert_fail ("Subtarget.hasAVX2() && \"256-bit insert/extract only available in AVX2\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8069, __extension__
__PRETTY_FUNCTION__))
;
8070 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
8071 }
8072 if (!table) { // try the AVX512 table
8073 assert(Subtarget.hasAVX512() && "Requires AVX-512")(static_cast <bool> (Subtarget.hasAVX512() && "Requires AVX-512"
) ? void (0) : __assert_fail ("Subtarget.hasAVX512() && \"Requires AVX-512\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8073, __extension__
__PRETTY_FUNCTION__))
;
8074 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
8075 // Don't change integer Q instructions to D instructions.
8076 if (table && Domain == 3 && table[3] == MI.getOpcode())
8077 Domain = 4;
8078 }
8079 if (!table) { // try the AVX512DQ table
8080 assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ")(static_cast <bool> ((Subtarget.hasDQI() || Domain >=
3) && "Requires AVX-512DQ") ? void (0) : __assert_fail
("(Subtarget.hasDQI() || Domain >= 3) && \"Requires AVX-512DQ\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8080, __extension__
__PRETTY_FUNCTION__))
;
8081 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
8082 // Don't change integer Q instructions to D instructions and
8083 // use D instructions if we started with a PS instruction.
8084 if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
8085 Domain = 4;
8086 }
8087 if (!table) { // try the AVX512DQMasked table
8088 assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ")(static_cast <bool> ((Subtarget.hasDQI() || Domain >=
3) && "Requires AVX-512DQ") ? void (0) : __assert_fail
("(Subtarget.hasDQI() || Domain >= 3) && \"Requires AVX-512DQ\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8088, __extension__
__PRETTY_FUNCTION__))
;
8089 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked);
8090 if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
8091 Domain = 4;
8092 }
8093 assert(table && "Cannot change domain")(static_cast <bool> (table && "Cannot change domain"
) ? void (0) : __assert_fail ("table && \"Cannot change domain\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8093, __extension__
__PRETTY_FUNCTION__))
;
8094 MI.setDesc(get(table[Domain - 1]));
8095}
8096
8097/// Return the noop instruction to use for a noop.
8098MCInst X86InstrInfo::getNop() const {
8099 MCInst Nop;
8100 Nop.setOpcode(X86::NOOP);
8101 return Nop;
8102}
8103
8104bool X86InstrInfo::isHighLatencyDef(int opc) const {
8105 switch (opc) {
8106 default: return false;
8107 case X86::DIVPDrm:
8108 case X86::DIVPDrr:
8109 case X86::DIVPSrm:
8110 case X86::DIVPSrr:
8111 case X86::DIVSDrm:
8112 case X86::DIVSDrm_Int:
8113 case X86::DIVSDrr:
8114 case X86::DIVSDrr_Int:
8115 case X86::DIVSSrm:
8116 case X86::DIVSSrm_Int:
8117 case X86::DIVSSrr:
8118 case X86::DIVSSrr_Int:
8119 case X86::SQRTPDm:
8120 case X86::SQRTPDr:
8121 case X86::SQRTPSm:
8122 case X86::SQRTPSr:
8123 case X86::SQRTSDm:
8124 case X86::SQRTSDm_Int:
8125 case X86::SQRTSDr:
8126 case X86::SQRTSDr_Int:
8127 case X86::SQRTSSm:
8128 case X86::SQRTSSm_Int:
8129 case X86::SQRTSSr:
8130 case X86::SQRTSSr_Int:
8131 // AVX instructions with high latency
8132 case X86::VDIVPDrm:
8133 case X86::VDIVPDrr:
8134 case X86::VDIVPDYrm:
8135 case X86::VDIVPDYrr:
8136 case X86::VDIVPSrm:
8137 case X86::VDIVPSrr:
8138 case X86::VDIVPSYrm:
8139 case X86::VDIVPSYrr:
8140 case X86::VDIVSDrm:
8141 case X86::VDIVSDrm_Int:
8142 case X86::VDIVSDrr:
8143 case X86::VDIVSDrr_Int:
8144 case X86::VDIVSSrm:
8145 case X86::VDIVSSrm_Int:
8146 case X86::VDIVSSrr:
8147 case X86::VDIVSSrr_Int:
8148 case X86::VSQRTPDm:
8149 case X86::VSQRTPDr:
8150 case X86::VSQRTPDYm:
8151 case X86::VSQRTPDYr:
8152 case X86::VSQRTPSm:
8153 case X86::VSQRTPSr:
8154 case X86::VSQRTPSYm:
8155 case X86::VSQRTPSYr:
8156 case X86::VSQRTSDm:
8157 case X86::VSQRTSDm_Int:
8158 case X86::VSQRTSDr:
8159 case X86::VSQRTSDr_Int:
8160 case X86::VSQRTSSm:
8161 case X86::VSQRTSSm_Int:
8162 case X86::VSQRTSSr:
8163 case X86::VSQRTSSr_Int:
8164 // AVX512 instructions with high latency
8165 case X86::VDIVPDZ128rm:
8166 case X86::VDIVPDZ128rmb:
8167 case X86::VDIVPDZ128rmbk:
8168 case X86::VDIVPDZ128rmbkz:
8169 case X86::VDIVPDZ128rmk:
8170 case X86::VDIVPDZ128rmkz:
8171 case X86::VDIVPDZ128rr:
8172 case X86::VDIVPDZ128rrk:
8173 case X86::VDIVPDZ128rrkz:
8174 case X86::VDIVPDZ256rm:
8175 case X86::VDIVPDZ256rmb:
8176 case X86::VDIVPDZ256rmbk:
8177 case X86::VDIVPDZ256rmbkz:
8178 case X86::VDIVPDZ256rmk:
8179 case X86::VDIVPDZ256rmkz:
8180 case X86::VDIVPDZ256rr:
8181 case X86::VDIVPDZ256rrk:
8182 case X86::VDIVPDZ256rrkz:
8183 case X86::VDIVPDZrrb:
8184 case X86::VDIVPDZrrbk:
8185 case X86::VDIVPDZrrbkz:
8186 case X86::VDIVPDZrm:
8187 case X86::VDIVPDZrmb:
8188 case X86::VDIVPDZrmbk:
8189 case X86::VDIVPDZrmbkz:
8190 case X86::VDIVPDZrmk:
8191 case X86::VDIVPDZrmkz:
8192 case X86::VDIVPDZrr:
8193 case X86::VDIVPDZrrk:
8194 case X86::VDIVPDZrrkz:
8195 case X86::VDIVPSZ128rm:
8196 case X86::VDIVPSZ128rmb:
8197 case X86::VDIVPSZ128rmbk:
8198 case X86::VDIVPSZ128rmbkz:
8199 case X86::VDIVPSZ128rmk:
8200 case X86::VDIVPSZ128rmkz:
8201 case X86::VDIVPSZ128rr:
8202 case X86::VDIVPSZ128rrk:
8203 case X86::VDIVPSZ128rrkz:
8204 case X86::VDIVPSZ256rm:
8205 case X86::VDIVPSZ256rmb:
8206 case X86::VDIVPSZ256rmbk:
8207 case X86::VDIVPSZ256rmbkz:
8208 case X86::VDIVPSZ256rmk:
8209 case X86::VDIVPSZ256rmkz:
8210 case X86::VDIVPSZ256rr:
8211 case X86::VDIVPSZ256rrk:
8212 case X86::VDIVPSZ256rrkz:
8213 case X86::VDIVPSZrrb:
8214 case X86::VDIVPSZrrbk:
8215 case X86::VDIVPSZrrbkz:
8216 case X86::VDIVPSZrm:
8217 case X86::VDIVPSZrmb:
8218 case X86::VDIVPSZrmbk:
8219 case X86::VDIVPSZrmbkz:
8220 case X86::VDIVPSZrmk:
8221 case X86::VDIVPSZrmkz:
8222 case X86::VDIVPSZrr:
8223 case X86::VDIVPSZrrk:
8224 case X86::VDIVPSZrrkz:
8225 case X86::VDIVSDZrm:
8226 case X86::VDIVSDZrr:
8227 case X86::VDIVSDZrm_Int:
8228 case X86::VDIVSDZrm_Intk:
8229 case X86::VDIVSDZrm_Intkz:
8230 case X86::VDIVSDZrr_Int:
8231 case X86::VDIVSDZrr_Intk:
8232 case X86::VDIVSDZrr_Intkz:
8233 case X86::VDIVSDZrrb_Int:
8234 case X86::VDIVSDZrrb_Intk:
8235 case X86::VDIVSDZrrb_Intkz:
8236 case X86::VDIVSSZrm:
8237 case X86::VDIVSSZrr:
8238 case X86::VDIVSSZrm_Int:
8239 case X86::VDIVSSZrm_Intk:
8240 case X86::VDIVSSZrm_Intkz:
8241 case X86::VDIVSSZrr_Int:
8242 case X86::VDIVSSZrr_Intk:
8243 case X86::VDIVSSZrr_Intkz:
8244 case X86::VDIVSSZrrb_Int:
8245 case X86::VDIVSSZrrb_Intk:
8246 case X86::VDIVSSZrrb_Intkz:
8247 case X86::VSQRTPDZ128m:
8248 case X86::VSQRTPDZ128mb:
8249 case X86::VSQRTPDZ128mbk:
8250 case X86::VSQRTPDZ128mbkz:
8251 case X86::VSQRTPDZ128mk:
8252 case X86::VSQRTPDZ128mkz:
8253 case X86::VSQRTPDZ128r:
8254 case X86::VSQRTPDZ128rk:
8255 case X86::VSQRTPDZ128rkz:
8256 case X86::VSQRTPDZ256m:
8257 case X86::VSQRTPDZ256mb:
8258 case X86::VSQRTPDZ256mbk:
8259 case X86::VSQRTPDZ256mbkz:
8260 case X86::VSQRTPDZ256mk:
8261 case X86::VSQRTPDZ256mkz:
8262 case X86::VSQRTPDZ256r:
8263 case X86::VSQRTPDZ256rk:
8264 case X86::VSQRTPDZ256rkz:
8265 case X86::VSQRTPDZm:
8266 case X86::VSQRTPDZmb:
8267 case X86::VSQRTPDZmbk:
8268 case X86::VSQRTPDZmbkz:
8269 case X86::VSQRTPDZmk:
8270 case X86::VSQRTPDZmkz:
8271 case X86::VSQRTPDZr:
8272 case X86::VSQRTPDZrb:
8273 case X86::VSQRTPDZrbk:
8274 case X86::VSQRTPDZrbkz:
8275 case X86::VSQRTPDZrk:
8276 case X86::VSQRTPDZrkz:
8277 case X86::VSQRTPSZ128m:
8278 case X86::VSQRTPSZ128mb:
8279 case X86::VSQRTPSZ128mbk:
8280 case X86::VSQRTPSZ128mbkz:
8281 case X86::VSQRTPSZ128mk:
8282 case X86::VSQRTPSZ128mkz:
8283 case X86::VSQRTPSZ128r:
8284 case X86::VSQRTPSZ128rk:
8285 case X86::VSQRTPSZ128rkz:
8286 case X86::VSQRTPSZ256m:
8287 case X86::VSQRTPSZ256mb:
8288 case X86::VSQRTPSZ256mbk:
8289 case X86::VSQRTPSZ256mbkz:
8290 case X86::VSQRTPSZ256mk:
8291 case X86::VSQRTPSZ256mkz:
8292 case X86::VSQRTPSZ256r:
8293 case X86::VSQRTPSZ256rk:
8294 case X86::VSQRTPSZ256rkz:
8295 case X86::VSQRTPSZm:
8296 case X86::VSQRTPSZmb:
8297 case X86::VSQRTPSZmbk:
8298 case X86::VSQRTPSZmbkz:
8299 case X86::VSQRTPSZmk:
8300 case X86::VSQRTPSZmkz:
8301 case X86::VSQRTPSZr:
8302 case X86::VSQRTPSZrb:
8303 case X86::VSQRTPSZrbk:
8304 case X86::VSQRTPSZrbkz:
8305 case X86::VSQRTPSZrk:
8306 case X86::VSQRTPSZrkz:
8307 case X86::VSQRTSDZm:
8308 case X86::VSQRTSDZm_Int:
8309 case X86::VSQRTSDZm_Intk:
8310 case X86::VSQRTSDZm_Intkz:
8311 case X86::VSQRTSDZr:
8312 case X86::VSQRTSDZr_Int:
8313 case X86::VSQRTSDZr_Intk:
8314 case X86::VSQRTSDZr_Intkz:
8315 case X86::VSQRTSDZrb_Int:
8316 case X86::VSQRTSDZrb_Intk:
8317 case X86::VSQRTSDZrb_Intkz:
8318 case X86::VSQRTSSZm:
8319 case X86::VSQRTSSZm_Int:
8320 case X86::VSQRTSSZm_Intk:
8321 case X86::VSQRTSSZm_Intkz:
8322 case X86::VSQRTSSZr:
8323 case X86::VSQRTSSZr_Int:
8324 case X86::VSQRTSSZr_Intk:
8325 case X86::VSQRTSSZr_Intkz:
8326 case X86::VSQRTSSZrb_Int:
8327 case X86::VSQRTSSZrb_Intk:
8328 case X86::VSQRTSSZrb_Intkz:
8329
8330 case X86::VGATHERDPDYrm:
8331 case X86::VGATHERDPDZ128rm:
8332 case X86::VGATHERDPDZ256rm:
8333 case X86::VGATHERDPDZrm:
8334 case X86::VGATHERDPDrm:
8335 case X86::VGATHERDPSYrm:
8336 case X86::VGATHERDPSZ128rm:
8337 case X86::VGATHERDPSZ256rm:
8338 case X86::VGATHERDPSZrm:
8339 case X86::VGATHERDPSrm:
8340 case X86::VGATHERPF0DPDm:
8341 case X86::VGATHERPF0DPSm:
8342 case X86::VGATHERPF0QPDm:
8343 case X86::VGATHERPF0QPSm:
8344 case X86::VGATHERPF1DPDm:
8345 case X86::VGATHERPF1DPSm:
8346 case X86::VGATHERPF1QPDm:
8347 case X86::VGATHERPF1QPSm:
8348 case X86::VGATHERQPDYrm:
8349 case X86::VGATHERQPDZ128rm:
8350 case X86::VGATHERQPDZ256rm:
8351 case X86::VGATHERQPDZrm:
8352 case X86::VGATHERQPDrm:
8353 case X86::VGATHERQPSYrm:
8354 case X86::VGATHERQPSZ128rm:
8355 case X86::VGATHERQPSZ256rm:
8356 case X86::VGATHERQPSZrm:
8357 case X86::VGATHERQPSrm:
8358 case X86::VPGATHERDDYrm:
8359 case X86::VPGATHERDDZ128rm:
8360 case X86::VPGATHERDDZ256rm:
8361 case X86::VPGATHERDDZrm:
8362 case X86::VPGATHERDDrm:
8363 case X86::VPGATHERDQYrm:
8364 case X86::VPGATHERDQZ128rm:
8365 case X86::VPGATHERDQZ256rm:
8366 case X86::VPGATHERDQZrm:
8367 case X86::VPGATHERDQrm:
8368 case X86::VPGATHERQDYrm:
8369 case X86::VPGATHERQDZ128rm:
8370 case X86::VPGATHERQDZ256rm:
8371 case X86::VPGATHERQDZrm:
8372 case X86::VPGATHERQDrm:
8373 case X86::VPGATHERQQYrm:
8374 case X86::VPGATHERQQZ128rm:
8375 case X86::VPGATHERQQZ256rm:
8376 case X86::VPGATHERQQZrm:
8377 case X86::VPGATHERQQrm:
8378 case X86::VSCATTERDPDZ128mr:
8379 case X86::VSCATTERDPDZ256mr:
8380 case X86::VSCATTERDPDZmr:
8381 case X86::VSCATTERDPSZ128mr:
8382 case X86::VSCATTERDPSZ256mr:
8383 case X86::VSCATTERDPSZmr:
8384 case X86::VSCATTERPF0DPDm:
8385 case X86::VSCATTERPF0DPSm:
8386 case X86::VSCATTERPF0QPDm:
8387 case X86::VSCATTERPF0QPSm:
8388 case X86::VSCATTERPF1DPDm:
8389 case X86::VSCATTERPF1DPSm:
8390 case X86::VSCATTERPF1QPDm:
8391 case X86::VSCATTERPF1QPSm:
8392 case X86::VSCATTERQPDZ128mr:
8393 case X86::VSCATTERQPDZ256mr:
8394 case X86::VSCATTERQPDZmr:
8395 case X86::VSCATTERQPSZ128mr:
8396 case X86::VSCATTERQPSZ256mr:
8397 case X86::VSCATTERQPSZmr:
8398 case X86::VPSCATTERDDZ128mr:
8399 case X86::VPSCATTERDDZ256mr:
8400 case X86::VPSCATTERDDZmr:
8401 case X86::VPSCATTERDQZ128mr:
8402 case X86::VPSCATTERDQZ256mr:
8403 case X86::VPSCATTERDQZmr:
8404 case X86::VPSCATTERQDZ128mr:
8405 case X86::VPSCATTERQDZ256mr:
8406 case X86::VPSCATTERQDZmr:
8407 case X86::VPSCATTERQQZ128mr:
8408 case X86::VPSCATTERQQZ256mr:
8409 case X86::VPSCATTERQQZmr:
8410 return true;
8411 }
8412}
8413
8414bool X86InstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
8415 const MachineRegisterInfo *MRI,
8416 const MachineInstr &DefMI,
8417 unsigned DefIdx,
8418 const MachineInstr &UseMI,
8419 unsigned UseIdx) const {
8420 return isHighLatencyDef(DefMI.getOpcode());
8421}
8422
8423bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
8424 const MachineBasicBlock *MBB) const {
8425 assert(Inst.getNumExplicitOperands() == 3 && Inst.getNumExplicitDefs() == 1 &&(static_cast <bool> (Inst.getNumExplicitOperands() == 3
&& Inst.getNumExplicitDefs() == 1 && Inst.getNumDefs
() <= 2 && "Reassociation needs binary operators")
? void (0) : __assert_fail ("Inst.getNumExplicitOperands() == 3 && Inst.getNumExplicitDefs() == 1 && Inst.getNumDefs() <= 2 && \"Reassociation needs binary operators\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8426, __extension__
__PRETTY_FUNCTION__))
8426 Inst.getNumDefs() <= 2 && "Reassociation needs binary operators")(static_cast <bool> (Inst.getNumExplicitOperands() == 3
&& Inst.getNumExplicitDefs() == 1 && Inst.getNumDefs
() <= 2 && "Reassociation needs binary operators")
? void (0) : __assert_fail ("Inst.getNumExplicitOperands() == 3 && Inst.getNumExplicitDefs() == 1 && Inst.getNumDefs() <= 2 && \"Reassociation needs binary operators\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8426, __extension__
__PRETTY_FUNCTION__))
;
8427
8428 // Integer binary math/logic instructions have a third source operand:
8429 // the EFLAGS register. That operand must be both defined here and never
8430 // used; ie, it must be dead. If the EFLAGS operand is live, then we can
8431 // not change anything because rearranging the operands could affect other
8432 // instructions that depend on the exact status flags (zero, sign, etc.)
8433 // that are set by using these particular operands with this operation.
8434 const MachineOperand *FlagDef = Inst.findRegisterDefOperand(X86::EFLAGS);
8435 assert((Inst.getNumDefs() == 1 || FlagDef) &&(static_cast <bool> ((Inst.getNumDefs() == 1 || FlagDef
) && "Implicit def isn't flags?") ? void (0) : __assert_fail
("(Inst.getNumDefs() == 1 || FlagDef) && \"Implicit def isn't flags?\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8436, __extension__
__PRETTY_FUNCTION__))
8436 "Implicit def isn't flags?")(static_cast <bool> ((Inst.getNumDefs() == 1 || FlagDef
) && "Implicit def isn't flags?") ? void (0) : __assert_fail
("(Inst.getNumDefs() == 1 || FlagDef) && \"Implicit def isn't flags?\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8436, __extension__
__PRETTY_FUNCTION__))
;
8437 if (FlagDef && !FlagDef->isDead())
8438 return false;
8439
8440 return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
8441}
8442
8443// TODO: There are many more machine instruction opcodes to match:
8444// 1. Other data types (integer, vectors)
8445// 2. Other math / logic operations (xor, or)
8446// 3. Other forms of the same operation (intrinsics and other variants)
8447bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
8448 switch (Inst.getOpcode()) {
8449 case X86::AND8rr:
8450 case X86::AND16rr:
8451 case X86::AND32rr:
8452 case X86::AND64rr:
8453 case X86::OR8rr:
8454 case X86::OR16rr:
8455 case X86::OR32rr:
8456 case X86::OR64rr:
8457 case X86::XOR8rr:
8458 case X86::XOR16rr:
8459 case X86::XOR32rr:
8460 case X86::XOR64rr:
8461 case X86::IMUL16rr:
8462 case X86::IMUL32rr:
8463 case X86::IMUL64rr:
8464 case X86::PANDrr:
8465 case X86::PORrr:
8466 case X86::PXORrr:
8467 case X86::ANDPDrr:
8468 case X86::ANDPSrr:
8469 case X86::ORPDrr:
8470 case X86::ORPSrr:
8471 case X86::XORPDrr:
8472 case X86::XORPSrr:
8473 case X86::PADDBrr:
8474 case X86::PADDWrr:
8475 case X86::PADDDrr:
8476 case X86::PADDQrr:
8477 case X86::PMULLWrr:
8478 case X86::PMULLDrr:
8479 case X86::PMAXSBrr:
8480 case X86::PMAXSDrr:
8481 case X86::PMAXSWrr:
8482 case X86::PMAXUBrr:
8483 case X86::PMAXUDrr:
8484 case X86::PMAXUWrr:
8485 case X86::PMINSBrr:
8486 case X86::PMINSDrr:
8487 case X86::PMINSWrr:
8488 case X86::PMINUBrr:
8489 case X86::PMINUDrr:
8490 case X86::PMINUWrr:
8491 case X86::VPANDrr:
8492 case X86::VPANDYrr:
8493 case X86::VPANDDZ128rr:
8494 case X86::VPANDDZ256rr:
8495 case X86::VPANDDZrr:
8496 case X86::VPANDQZ128rr:
8497 case X86::VPANDQZ256rr:
8498 case X86::VPANDQZrr:
8499 case X86::VPORrr:
8500 case X86::VPORYrr:
8501 case X86::VPORDZ128rr:
8502 case X86::VPORDZ256rr:
8503 case X86::VPORDZrr:
8504 case X86::VPORQZ128rr:
8505 case X86::VPORQZ256rr:
8506 case X86::VPORQZrr:
8507 case X86::VPXORrr:
8508 case X86::VPXORYrr:
8509 case X86::VPXORDZ128rr:
8510 case X86::VPXORDZ256rr:
8511 case X86::VPXORDZrr:
8512 case X86::VPXORQZ128rr:
8513 case X86::VPXORQZ256rr:
8514 case X86::VPXORQZrr:
8515 case X86::VANDPDrr:
8516 case X86::VANDPSrr:
8517 case X86::VANDPDYrr:
8518 case X86::VANDPSYrr:
8519 case X86::VANDPDZ128rr:
8520 case X86::VANDPSZ128rr:
8521 case X86::VANDPDZ256rr:
8522 case X86::VANDPSZ256rr:
8523 case X86::VANDPDZrr:
8524 case X86::VANDPSZrr:
8525 case X86::VORPDrr:
8526 case X86::VORPSrr:
8527 case X86::VORPDYrr:
8528 case X86::VORPSYrr:
8529 case X86::VORPDZ128rr:
8530 case X86::VORPSZ128rr:
8531 case X86::VORPDZ256rr:
8532 case X86::VORPSZ256rr:
8533 case X86::VORPDZrr:
8534 case X86::VORPSZrr:
8535 case X86::VXORPDrr:
8536 case X86::VXORPSrr:
8537 case X86::VXORPDYrr:
8538 case X86::VXORPSYrr:
8539 case X86::VXORPDZ128rr:
8540 case X86::VXORPSZ128rr:
8541 case X86::VXORPDZ256rr:
8542 case X86::VXORPSZ256rr:
8543 case X86::VXORPDZrr:
8544 case X86::VXORPSZrr:
8545 case X86::KADDBrr:
8546 case X86::KADDWrr:
8547 case X86::KADDDrr:
8548 case X86::KADDQrr:
8549 case X86::KANDBrr:
8550 case X86::KANDWrr:
8551 case X86::KANDDrr:
8552 case X86::KANDQrr:
8553 case X86::KORBrr:
8554 case X86::KORWrr:
8555 case X86::KORDrr:
8556 case X86::KORQrr:
8557 case X86::KXORBrr:
8558 case X86::KXORWrr:
8559 case X86::KXORDrr:
8560 case X86::KXORQrr:
8561 case X86::VPADDBrr:
8562 case X86::VPADDWrr:
8563 case X86::VPADDDrr:
8564 case X86::VPADDQrr:
8565 case X86::VPADDBYrr:
8566 case X86::VPADDWYrr:
8567 case X86::VPADDDYrr:
8568 case X86::VPADDQYrr:
8569 case X86::VPADDBZ128rr:
8570 case X86::VPADDWZ128rr:
8571 case X86::VPADDDZ128rr:
8572 case X86::VPADDQZ128rr:
8573 case X86::VPADDBZ256rr:
8574 case X86::VPADDWZ256rr:
8575 case X86::VPADDDZ256rr:
8576 case X86::VPADDQZ256rr:
8577 case X86::VPADDBZrr:
8578 case X86::VPADDWZrr:
8579 case X86::VPADDDZrr:
8580 case X86::VPADDQZrr:
8581 case X86::VPMULLWrr:
8582 case X86::VPMULLWYrr:
8583 case X86::VPMULLWZ128rr:
8584 case X86::VPMULLWZ256rr:
8585 case X86::VPMULLWZrr:
8586 case X86::VPMULLDrr:
8587 case X86::VPMULLDYrr:
8588 case X86::VPMULLDZ128rr:
8589 case X86::VPMULLDZ256rr:
8590 case X86::VPMULLDZrr:
8591 case X86::VPMULLQZ128rr:
8592 case X86::VPMULLQZ256rr:
8593 case X86::VPMULLQZrr:
8594 case X86::VPMAXSBrr:
8595 case X86::VPMAXSBYrr:
8596 case X86::VPMAXSBZ128rr:
8597 case X86::VPMAXSBZ256rr:
8598 case X86::VPMAXSBZrr:
8599 case X86::VPMAXSDrr:
8600 case X86::VPMAXSDYrr:
8601 case X86::VPMAXSDZ128rr:
8602 case X86::VPMAXSDZ256rr:
8603 case X86::VPMAXSDZrr:
8604 case X86::VPMAXSQZ128rr:
8605 case X86::VPMAXSQZ256rr:
8606 case X86::VPMAXSQZrr:
8607 case X86::VPMAXSWrr:
8608 case X86::VPMAXSWYrr:
8609 case X86::VPMAXSWZ128rr:
8610 case X86::VPMAXSWZ256rr:
8611 case X86::VPMAXSWZrr:
8612 case X86::VPMAXUBrr:
8613 case X86::VPMAXUBYrr:
8614 case X86::VPMAXUBZ128rr:
8615 case X86::VPMAXUBZ256rr:
8616 case X86::VPMAXUBZrr:
8617 case X86::VPMAXUDrr:
8618 case X86::VPMAXUDYrr:
8619 case X86::VPMAXUDZ128rr:
8620 case X86::VPMAXUDZ256rr:
8621 case X86::VPMAXUDZrr:
8622 case X86::VPMAXUQZ128rr:
8623 case X86::VPMAXUQZ256rr:
8624 case X86::VPMAXUQZrr:
8625 case X86::VPMAXUWrr:
8626 case X86::VPMAXUWYrr:
8627 case X86::VPMAXUWZ128rr:
8628 case X86::VPMAXUWZ256rr:
8629 case X86::VPMAXUWZrr:
8630 case X86::VPMINSBrr:
8631 case X86::VPMINSBYrr:
8632 case X86::VPMINSBZ128rr:
8633 case X86::VPMINSBZ256rr:
8634 case X86::VPMINSBZrr:
8635 case X86::VPMINSDrr:
8636 case X86::VPMINSDYrr:
8637 case X86::VPMINSDZ128rr:
8638 case X86::VPMINSDZ256rr:
8639 case X86::VPMINSDZrr:
8640 case X86::VPMINSQZ128rr:
8641 case X86::VPMINSQZ256rr:
8642 case X86::VPMINSQZrr:
8643 case X86::VPMINSWrr:
8644 case X86::VPMINSWYrr:
8645 case X86::VPMINSWZ128rr:
8646 case X86::VPMINSWZ256rr:
8647 case X86::VPMINSWZrr:
8648 case X86::VPMINUBrr:
8649 case X86::VPMINUBYrr:
8650 case X86::VPMINUBZ128rr:
8651 case X86::VPMINUBZ256rr:
8652 case X86::VPMINUBZrr:
8653 case X86::VPMINUDrr:
8654 case X86::VPMINUDYrr:
8655 case X86::VPMINUDZ128rr:
8656 case X86::VPMINUDZ256rr:
8657 case X86::VPMINUDZrr:
8658 case X86::VPMINUQZ128rr:
8659 case X86::VPMINUQZ256rr:
8660 case X86::VPMINUQZrr:
8661 case X86::VPMINUWrr:
8662 case X86::VPMINUWYrr:
8663 case X86::VPMINUWZ128rr:
8664 case X86::VPMINUWZ256rr:
8665 case X86::VPMINUWZrr:
8666 // Normal min/max instructions are not commutative because of NaN and signed
8667 // zero semantics, but these are. Thus, there's no need to check for global
8668 // relaxed math; the instructions themselves have the properties we need.
8669 case X86::MAXCPDrr:
8670 case X86::MAXCPSrr:
8671 case X86::MAXCSDrr:
8672 case X86::MAXCSSrr:
8673 case X86::MINCPDrr:
8674 case X86::MINCPSrr:
8675 case X86::MINCSDrr:
8676 case X86::MINCSSrr:
8677 case X86::VMAXCPDrr:
8678 case X86::VMAXCPSrr:
8679 case X86::VMAXCPDYrr:
8680 case X86::VMAXCPSYrr:
8681 case X86::VMAXCPDZ128rr:
8682 case X86::VMAXCPSZ128rr:
8683 case X86::VMAXCPDZ256rr:
8684 case X86::VMAXCPSZ256rr:
8685 case X86::VMAXCPDZrr:
8686 case X86::VMAXCPSZrr:
8687 case X86::VMAXCSDrr:
8688 case X86::VMAXCSSrr:
8689 case X86::VMAXCSDZrr:
8690 case X86::VMAXCSSZrr:
8691 case X86::VMINCPDrr:
8692 case X86::VMINCPSrr:
8693 case X86::VMINCPDYrr:
8694 case X86::VMINCPSYrr:
8695 case X86::VMINCPDZ128rr:
8696 case X86::VMINCPSZ128rr:
8697 case X86::VMINCPDZ256rr:
8698 case X86::VMINCPSZ256rr:
8699 case X86::VMINCPDZrr:
8700 case X86::VMINCPSZrr:
8701 case X86::VMINCSDrr:
8702 case X86::VMINCSSrr:
8703 case X86::VMINCSDZrr:
8704 case X86::VMINCSSZrr:
8705 case X86::VMAXCPHZ128rr:
8706 case X86::VMAXCPHZ256rr:
8707 case X86::VMAXCPHZrr:
8708 case X86::VMAXCSHZrr:
8709 case X86::VMINCPHZ128rr:
8710 case X86::VMINCPHZ256rr:
8711 case X86::VMINCPHZrr:
8712 case X86::VMINCSHZrr:
8713 return true;
8714 case X86::ADDPDrr:
8715 case X86::ADDPSrr:
8716 case X86::ADDSDrr:
8717 case X86::ADDSSrr:
8718 case X86::MULPDrr:
8719 case X86::MULPSrr:
8720 case X86::MULSDrr:
8721 case X86::MULSSrr:
8722 case X86::VADDPDrr:
8723 case X86::VADDPSrr:
8724 case X86::VADDPDYrr:
8725 case X86::VADDPSYrr:
8726 case X86::VADDPDZ128rr:
8727 case X86::VADDPSZ128rr:
8728 case X86::VADDPDZ256rr:
8729 case X86::VADDPSZ256rr:
8730 case X86::VADDPDZrr:
8731 case X86::VADDPSZrr:
8732 case X86::VADDSDrr:
8733 case X86::VADDSSrr:
8734 case X86::VADDSDZrr:
8735 case X86::VADDSSZrr:
8736 case X86::VMULPDrr:
8737 case X86::VMULPSrr:
8738 case X86::VMULPDYrr:
8739 case X86::VMULPSYrr:
8740 case X86::VMULPDZ128rr:
8741 case X86::VMULPSZ128rr:
8742 case X86::VMULPDZ256rr:
8743 case X86::VMULPSZ256rr:
8744 case X86::VMULPDZrr:
8745 case X86::VMULPSZrr:
8746 case X86::VMULSDrr:
8747 case X86::VMULSSrr:
8748 case X86::VMULSDZrr:
8749 case X86::VMULSSZrr:
8750 case X86::VADDPHZ128rr:
8751 case X86::VADDPHZ256rr:
8752 case X86::VADDPHZrr:
8753 case X86::VADDSHZrr:
8754 case X86::VMULPHZ128rr:
8755 case X86::VMULPHZ256rr:
8756 case X86::VMULPHZrr:
8757 case X86::VMULSHZrr:
8758 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
8759 Inst.getFlag(MachineInstr::MIFlag::FmNsz);
8760 default:
8761 return false;
8762 }
8763}
8764
8765/// If \p DescribedReg overlaps with the MOVrr instruction's destination
8766/// register then, if possible, describe the value in terms of the source
8767/// register.
8768static Optional<ParamLoadedValue>
8769describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg,
8770 const TargetRegisterInfo *TRI) {
8771 Register DestReg = MI.getOperand(0).getReg();
8772 Register SrcReg = MI.getOperand(1).getReg();
8773
8774 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
8775
8776 // If the described register is the destination, just return the source.
8777 if (DestReg == DescribedReg)
8778 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
8779
8780 // If the described register is a sub-register of the destination register,
8781 // then pick out the source register's corresponding sub-register.
8782 if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) {
8783 Register SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx);
8784 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
8785 }
8786
8787 // The remaining case to consider is when the described register is a
8788 // super-register of the destination register. MOV8rr and MOV16rr does not
8789 // write to any of the other bytes in the register, meaning that we'd have to
8790 // describe the value using a combination of the source register and the
8791 // non-overlapping bits in the described register, which is not currently
8792 // possible.
8793 if (MI.getOpcode() == X86::MOV8rr || MI.getOpcode() == X86::MOV16rr ||
8794 !TRI->isSuperRegister(DestReg, DescribedReg))
8795 return None;
8796
8797 assert(MI.getOpcode() == X86::MOV32rr && "Unexpected super-register case")(static_cast <bool> (MI.getOpcode() == X86::MOV32rr &&
"Unexpected super-register case") ? void (0) : __assert_fail
("MI.getOpcode() == X86::MOV32rr && \"Unexpected super-register case\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8797, __extension__
__PRETTY_FUNCTION__))
;
8798 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
8799}
8800
8801Optional<ParamLoadedValue>
8802X86InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const {
8803 const MachineOperand *Op = nullptr;
8804 DIExpression *Expr = nullptr;
8805
8806 const TargetRegisterInfo *TRI = &getRegisterInfo();
8807
8808 switch (MI.getOpcode()) {
8809 case X86::LEA32r:
8810 case X86::LEA64r:
8811 case X86::LEA64_32r: {
8812 // We may need to describe a 64-bit parameter with a 32-bit LEA.
8813 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
8814 return None;
8815
8816 // Operand 4 could be global address. For now we do not support
8817 // such situation.
8818 if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm())
8819 return None;
8820
8821 const MachineOperand &Op1 = MI.getOperand(1);
8822 const MachineOperand &Op2 = MI.getOperand(3);
8823 assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister ||(static_cast <bool> (Op2.isReg() && (Op2.getReg
() == X86::NoRegister || Register::isPhysicalRegister(Op2.getReg
()))) ? void (0) : __assert_fail ("Op2.isReg() && (Op2.getReg() == X86::NoRegister || Register::isPhysicalRegister(Op2.getReg()))"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8824, __extension__
__PRETTY_FUNCTION__))
8824 Register::isPhysicalRegister(Op2.getReg())))(static_cast <bool> (Op2.isReg() && (Op2.getReg
() == X86::NoRegister || Register::isPhysicalRegister(Op2.getReg
()))) ? void (0) : __assert_fail ("Op2.isReg() && (Op2.getReg() == X86::NoRegister || Register::isPhysicalRegister(Op2.getReg()))"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8824, __extension__
__PRETTY_FUNCTION__))
;
8825
8826 // Omit situations like:
8827 // %rsi = lea %rsi, 4, ...
8828 if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) ||
8829 Op2.getReg() == MI.getOperand(0).getReg())
8830 return None;
8831 else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister &&
8832 TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) ||
8833 (Op2.getReg() != X86::NoRegister &&
8834 TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg())))
8835 return None;
8836
8837 int64_t Coef = MI.getOperand(2).getImm();
8838 int64_t Offset = MI.getOperand(4).getImm();
8839 SmallVector<uint64_t, 8> Ops;
8840
8841 if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) {
8842 Op = &Op1;
8843 } else if (Op1.isFI())
8844 Op = &Op1;
8845
8846 if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) {
8847 Ops.push_back(dwarf::DW_OP_constu);
8848 Ops.push_back(Coef + 1);
8849 Ops.push_back(dwarf::DW_OP_mul);
8850 } else {
8851 if (Op && Op2.getReg() != X86::NoRegister) {
8852 int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false);
8853 if (dwarfReg < 0)
8854 return None;
8855 else if (dwarfReg < 32) {
8856 Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg);
8857 Ops.push_back(0);
8858 } else {
8859 Ops.push_back(dwarf::DW_OP_bregx);
8860 Ops.push_back(dwarfReg);
8861 Ops.push_back(0);
8862 }
8863 } else if (!Op) {
8864 assert(Op2.getReg() != X86::NoRegister)(static_cast <bool> (Op2.getReg() != X86::NoRegister) ?
void (0) : __assert_fail ("Op2.getReg() != X86::NoRegister",
"llvm/lib/Target/X86/X86InstrInfo.cpp", 8864, __extension__ __PRETTY_FUNCTION__
))
;
8865 Op = &Op2;
8866 }
8867
8868 if (Coef > 1) {
8869 assert(Op2.getReg() != X86::NoRegister)(static_cast <bool> (Op2.getReg() != X86::NoRegister) ?
void (0) : __assert_fail ("Op2.getReg() != X86::NoRegister",
"llvm/lib/Target/X86/X86InstrInfo.cpp", 8869, __extension__ __PRETTY_FUNCTION__
))
;
8870 Ops.push_back(dwarf::DW_OP_constu);
8871 Ops.push_back(Coef);
8872 Ops.push_back(dwarf::DW_OP_mul);
8873 }
8874
8875 if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) &&
8876 Op2.getReg() != X86::NoRegister) {
8877 Ops.push_back(dwarf::DW_OP_plus);
8878 }
8879 }
8880
8881 DIExpression::appendOffset(Ops, Offset);
8882 Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops);
8883
8884 return ParamLoadedValue(*Op, Expr);;
8885 }
8886 case X86::MOV8ri:
8887 case X86::MOV16ri:
8888 // TODO: Handle MOV8ri and MOV16ri.
8889 return None;
8890 case X86::MOV32ri:
8891 case X86::MOV64ri:
8892 case X86::MOV64ri32:
8893 // MOV32ri may be used for producing zero-extended 32-bit immediates in
8894 // 64-bit parameters, so we need to consider super-registers.
8895 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
8896 return None;
8897 return ParamLoadedValue(MI.getOperand(1), Expr);
8898 case X86::MOV8rr:
8899 case X86::MOV16rr:
8900 case X86::MOV32rr:
8901 case X86::MOV64rr:
8902 return describeMOVrrLoadedValue(MI, Reg, TRI);
8903 case X86::XOR32rr: {
8904 // 64-bit parameters are zero-materialized using XOR32rr, so also consider
8905 // super-registers.
8906 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
8907 return None;
8908 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
8909 return ParamLoadedValue(MachineOperand::CreateImm(0), Expr);
8910 return None;
8911 }
8912 case X86::MOVSX64rr32: {
8913 // We may need to describe the lower 32 bits of the MOVSX; for example, in
8914 // cases like this:
8915 //
8916 // $ebx = [...]
8917 // $rdi = MOVSX64rr32 $ebx
8918 // $esi = MOV32rr $edi
8919 if (!TRI->isSubRegisterEq(MI.getOperand(0).getReg(), Reg))
8920 return None;
8921
8922 Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
8923
8924 // If the described register is the destination register we need to
8925 // sign-extend the source register from 32 bits. The other case we handle
8926 // is when the described register is the 32-bit sub-register of the
8927 // destination register, in case we just need to return the source
8928 // register.
8929 if (Reg == MI.getOperand(0).getReg())
8930 Expr = DIExpression::appendExt(Expr, 32, 64, true);
8931 else
8932 assert(X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg) &&(static_cast <bool> (X86MCRegisterClasses[X86::GR32RegClassID
].contains(Reg) && "Unhandled sub-register case for MOVSX64rr32"
) ? void (0) : __assert_fail ("X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg) && \"Unhandled sub-register case for MOVSX64rr32\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8933, __extension__
__PRETTY_FUNCTION__))
8933 "Unhandled sub-register case for MOVSX64rr32")(static_cast <bool> (X86MCRegisterClasses[X86::GR32RegClassID
].contains(Reg) && "Unhandled sub-register case for MOVSX64rr32"
) ? void (0) : __assert_fail ("X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg) && \"Unhandled sub-register case for MOVSX64rr32\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8933, __extension__
__PRETTY_FUNCTION__))
;
8934
8935 return ParamLoadedValue(MI.getOperand(1), Expr);
8936 }
8937 default:
8938 assert(!MI.isMoveImmediate() && "Unexpected MoveImm instruction")(static_cast <bool> (!MI.isMoveImmediate() && "Unexpected MoveImm instruction"
) ? void (0) : __assert_fail ("!MI.isMoveImmediate() && \"Unexpected MoveImm instruction\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8938, __extension__
__PRETTY_FUNCTION__))
;
8939 return TargetInstrInfo::describeLoadedValue(MI, Reg);
8940 }
8941}
8942
8943/// This is an architecture-specific helper function of reassociateOps.
8944/// Set special operand attributes for new instructions after reassociation.
8945void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
8946 MachineInstr &OldMI2,
8947 MachineInstr &NewMI1,
8948 MachineInstr &NewMI2) const {
8949 // Propagate FP flags from the original instructions.
8950 // But clear poison-generating flags because those may not be valid now.
8951 // TODO: There should be a helper function for copying only fast-math-flags.
8952 uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
8953 NewMI1.setFlags(IntersectedFlags);
8954 NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
8955 NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
8956 NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
8957
8958 NewMI2.setFlags(IntersectedFlags);
8959 NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
8960 NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
8961 NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
8962
8963 // Integer instructions may define an implicit EFLAGS dest register operand.
8964 MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS);
8965 MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS);
8966
8967 assert(!OldFlagDef1 == !OldFlagDef2 &&(static_cast <bool> (!OldFlagDef1 == !OldFlagDef2 &&
"Unexpected instruction type for reassociation") ? void (0) :
__assert_fail ("!OldFlagDef1 == !OldFlagDef2 && \"Unexpected instruction type for reassociation\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8968, __extension__
__PRETTY_FUNCTION__))
8968 "Unexpected instruction type for reassociation")(static_cast <bool> (!OldFlagDef1 == !OldFlagDef2 &&
"Unexpected instruction type for reassociation") ? void (0) :
__assert_fail ("!OldFlagDef1 == !OldFlagDef2 && \"Unexpected instruction type for reassociation\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8968, __extension__
__PRETTY_FUNCTION__))
;
8969
8970 if (!OldFlagDef1 || !OldFlagDef2)
8971 return;
8972
8973 assert(OldFlagDef1->isDead() && OldFlagDef2->isDead() &&(static_cast <bool> (OldFlagDef1->isDead() &&
OldFlagDef2->isDead() && "Must have dead EFLAGS operand in reassociable instruction"
) ? void (0) : __assert_fail ("OldFlagDef1->isDead() && OldFlagDef2->isDead() && \"Must have dead EFLAGS operand in reassociable instruction\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8974, __extension__
__PRETTY_FUNCTION__))
8974 "Must have dead EFLAGS operand in reassociable instruction")(static_cast <bool> (OldFlagDef1->isDead() &&
OldFlagDef2->isDead() && "Must have dead EFLAGS operand in reassociable instruction"
) ? void (0) : __assert_fail ("OldFlagDef1->isDead() && OldFlagDef2->isDead() && \"Must have dead EFLAGS operand in reassociable instruction\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8974, __extension__
__PRETTY_FUNCTION__))
;
8975
8976 MachineOperand *NewFlagDef1 = NewMI1.findRegisterDefOperand(X86::EFLAGS);
8977 MachineOperand *NewFlagDef2 = NewMI2.findRegisterDefOperand(X86::EFLAGS);
8978
8979 assert(NewFlagDef1 && NewFlagDef2 &&(static_cast <bool> (NewFlagDef1 && NewFlagDef2
&& "Unexpected operand in reassociable instruction")
? void (0) : __assert_fail ("NewFlagDef1 && NewFlagDef2 && \"Unexpected operand in reassociable instruction\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8980, __extension__
__PRETTY_FUNCTION__))
8980 "Unexpected operand in reassociable instruction")(static_cast <bool> (NewFlagDef1 && NewFlagDef2
&& "Unexpected operand in reassociable instruction")
? void (0) : __assert_fail ("NewFlagDef1 && NewFlagDef2 && \"Unexpected operand in reassociable instruction\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 8980, __extension__
__PRETTY_FUNCTION__))
;
8981
8982 // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
8983 // of this pass or other passes. The EFLAGS operands must be dead in these new
8984 // instructions because the EFLAGS operands in the original instructions must
8985 // be dead in order for reassociation to occur.
8986 NewFlagDef1->setIsDead();
8987 NewFlagDef2->setIsDead();
8988}
8989
8990std::pair<unsigned, unsigned>
8991X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
8992 return std::make_pair(TF, 0u);
8993}
8994
8995ArrayRef<std::pair<unsigned, const char *>>
8996X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
8997 using namespace X86II;
8998 static const std::pair<unsigned, const char *> TargetFlags[] = {
8999 {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
9000 {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
9001 {MO_GOT, "x86-got"},
9002 {MO_GOTOFF, "x86-gotoff"},
9003 {MO_GOTPCREL, "x86-gotpcrel"},
9004 {MO_GOTPCREL_NORELAX, "x86-gotpcrel-norelax"},
9005 {MO_PLT, "x86-plt"},
9006 {MO_TLSGD, "x86-tlsgd"},
9007 {MO_TLSLD, "x86-tlsld"},
9008 {MO_TLSLDM, "x86-tlsldm"},
9009 {MO_GOTTPOFF, "x86-gottpoff"},
9010 {MO_INDNTPOFF, "x86-indntpoff"},
9011 {MO_TPOFF, "x86-tpoff"},
9012 {MO_DTPOFF, "x86-dtpoff"},
9013 {MO_NTPOFF, "x86-ntpoff"},
9014 {MO_GOTNTPOFF, "x86-gotntpoff"},
9015 {MO_DLLIMPORT, "x86-dllimport"},
9016 {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
9017 {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
9018 {MO_TLVP, "x86-tlvp"},
9019 {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
9020 {MO_SECREL, "x86-secrel"},
9021 {MO_COFFSTUB, "x86-coffstub"}};
9022 return makeArrayRef(TargetFlags);
9023}
9024
9025namespace {
9026 /// Create Global Base Reg pass. This initializes the PIC
9027 /// global base register for x86-32.
9028 struct CGBR : public MachineFunctionPass {
9029 static char ID;
9030 CGBR() : MachineFunctionPass(ID) {}
9031
9032 bool runOnMachineFunction(MachineFunction &MF) override {
9033 const X86TargetMachine *TM =
9034 static_cast<const X86TargetMachine *>(&MF.getTarget());
9035 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
9036
9037 // Don't do anything in the 64-bit small and kernel code models. They use
9038 // RIP-relative addressing for everything.
9039 if (STI.is64Bit() && (TM->getCodeModel() == CodeModel::Small ||
9040 TM->getCodeModel() == CodeModel::Kernel))
9041 return false;
9042
9043 // Only emit a global base reg in PIC mode.
9044 if (!TM->isPositionIndependent())
9045 return false;
9046
9047 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
9048 Register GlobalBaseReg = X86FI->getGlobalBaseReg();
9049
9050 // If we didn't need a GlobalBaseReg, don't insert code.
9051 if (GlobalBaseReg == 0)
9052 return false;
9053
9054 // Insert the set of GlobalBaseReg into the first MBB of the function
9055 MachineBasicBlock &FirstMBB = MF.front();
9056 MachineBasicBlock::iterator MBBI = FirstMBB.begin();
9057 DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
9058 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9059 const X86InstrInfo *TII = STI.getInstrInfo();
9060
9061 Register PC;
9062 if (STI.isPICStyleGOT())
9063 PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
9064 else
9065 PC = GlobalBaseReg;
9066
9067 if (STI.is64Bit()) {
9068 if (TM->getCodeModel() == CodeModel::Medium) {
9069 // In the medium code model, use a RIP-relative LEA to materialize the
9070 // GOT.
9071 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
9072 .addReg(X86::RIP)
9073 .addImm(0)
9074 .addReg(0)
9075 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
9076 .addReg(0);
9077 } else if (TM->getCodeModel() == CodeModel::Large) {
9078 // In the large code model, we are aiming for this code, though the
9079 // register allocation may vary:
9080 // leaq .LN$pb(%rip), %rax
9081 // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
9082 // addq %rcx, %rax
9083 // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
9084 Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
9085 Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
9086 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
9087 .addReg(X86::RIP)
9088 .addImm(0)
9089 .addReg(0)
9090 .addSym(MF.getPICBaseSymbol())
9091 .addReg(0);
9092 std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
9093 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
9094 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
9095 X86II::MO_PIC_BASE_OFFSET);
9096 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
9097 .addReg(PBReg, RegState::Kill)
9098 .addReg(GOTReg, RegState::Kill);
9099 } else {
9100 llvm_unreachable("unexpected code model")::llvm::llvm_unreachable_internal("unexpected code model", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 9100)
;
9101 }
9102 } else {
9103 // Operand of MovePCtoStack is completely ignored by asm printer. It's
9104 // only used in JIT code emission as displacement to pc.
9105 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
9106
9107 // If we're using vanilla 'GOT' PIC style, we should use relative
9108 // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
9109 if (STI.isPICStyleGOT()) {
9110 // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel],
9111 // %some_register
9112 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
9113 .addReg(PC)
9114 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
9115 X86II::MO_GOT_ABSOLUTE_ADDRESS);
9116 }
9117 }
9118
9119 return true;
9120 }
9121
9122 StringRef getPassName() const override {
9123 return "X86 PIC Global Base Reg Initialization";
9124 }
9125
9126 void getAnalysisUsage(AnalysisUsage &AU) const override {
9127 AU.setPreservesCFG();
9128 MachineFunctionPass::getAnalysisUsage(AU);
9129 }
9130 };
9131} // namespace
9132
9133char CGBR::ID = 0;
9134FunctionPass*
9135llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
9136
9137namespace {
9138 struct LDTLSCleanup : public MachineFunctionPass {
9139 static char ID;
9140 LDTLSCleanup() : MachineFunctionPass(ID) {}
9141
9142 bool runOnMachineFunction(MachineFunction &MF) override {
9143 if (skipFunction(MF.getFunction()))
9144 return false;
9145
9146 X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
9147 if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
9148 // No point folding accesses if there isn't at least two.
9149 return false;
9150 }
9151
9152 MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
9153 return VisitNode(DT->getRootNode(), 0);
9154 }
9155
9156 // Visit the dominator subtree rooted at Node in pre-order.
9157 // If TLSBaseAddrReg is non-null, then use that to replace any
9158 // TLS_base_addr instructions. Otherwise, create the register
9159 // when the first such instruction is seen, and then use it
9160 // as we encounter more instructions.
9161 bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
9162 MachineBasicBlock *BB = Node->getBlock();
9163 bool Changed = false;
9164
9165 // Traverse the current block.
9166 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
9167 ++I) {
9168 switch (I->getOpcode()) {
9169 case X86::TLS_base_addr32:
9170 case X86::TLS_base_addr64:
9171 if (TLSBaseAddrReg)
9172 I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
9173 else
9174 I = SetRegister(*I, &TLSBaseAddrReg);
9175 Changed = true;
9176 break;
9177 default:
9178 break;
9179 }
9180 }
9181
9182 // Visit the children of this block in the dominator tree.
9183 for (auto I = Node->begin(), E = Node->end(); I != E; ++I) {
9184 Changed |= VisitNode(*I, TLSBaseAddrReg);
9185 }
9186
9187 return Changed;
9188 }
9189
9190 // Replace the TLS_base_addr instruction I with a copy from
9191 // TLSBaseAddrReg, returning the new instruction.
9192 MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I,
9193 unsigned TLSBaseAddrReg) {
9194 MachineFunction *MF = I.getParent()->getParent();
9195 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
9196 const bool is64Bit = STI.is64Bit();
9197 const X86InstrInfo *TII = STI.getInstrInfo();
9198
9199 // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
9200 MachineInstr *Copy =
9201 BuildMI(*I.getParent(), I, I.getDebugLoc(),
9202 TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)
9203 .addReg(TLSBaseAddrReg);
9204
9205 // Erase the TLS_base_addr instruction.
9206 I.eraseFromParent();
9207
9208 return Copy;
9209 }
9210
9211 // Create a virtual register in *TLSBaseAddrReg, and populate it by
9212 // inserting a copy instruction after I. Returns the new instruction.
9213 MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
9214 MachineFunction *MF = I.getParent()->getParent();
9215 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
9216 const bool is64Bit = STI.is64Bit();
9217 const X86InstrInfo *TII = STI.getInstrInfo();
9218
9219 // Create a virtual register for the TLS base address.
9220 MachineRegisterInfo &RegInfo = MF->getRegInfo();
9221 *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
9222 ? &X86::GR64RegClass
9223 : &X86::GR32RegClass);
9224
9225 // Insert a copy from RAX/EAX to TLSBaseAddrReg.
9226 MachineInstr *Next = I.getNextNode();
9227 MachineInstr *Copy =
9228 BuildMI(*I.getParent(), Next, I.getDebugLoc(),
9229 TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
9230 .addReg(is64Bit ? X86::RAX : X86::EAX);
9231
9232 return Copy;
9233 }
9234
9235 StringRef getPassName() const override {
9236 return "Local Dynamic TLS Access Clean-up";
9237 }
9238
9239 void getAnalysisUsage(AnalysisUsage &AU) const override {
9240 AU.setPreservesCFG();
9241 AU.addRequired<MachineDominatorTree>();
9242 MachineFunctionPass::getAnalysisUsage(AU);
9243 }
9244 };
9245}
9246
9247char LDTLSCleanup::ID = 0;
9248FunctionPass*
9249llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
9250
9251/// Constants defining how certain sequences should be outlined.
9252///
9253/// \p MachineOutlinerDefault implies that the function is called with a call
9254/// instruction, and a return must be emitted for the outlined function frame.
9255///
9256/// That is,
9257///
9258/// I1 OUTLINED_FUNCTION:
9259/// I2 --> call OUTLINED_FUNCTION I1
9260/// I3 I2
9261/// I3
9262/// ret
9263///
9264/// * Call construction overhead: 1 (call instruction)
9265/// * Frame construction overhead: 1 (return instruction)
9266///
9267/// \p MachineOutlinerTailCall implies that the function is being tail called.
9268/// A jump is emitted instead of a call, and the return is already present in
9269/// the outlined sequence. That is,
9270///
9271/// I1 OUTLINED_FUNCTION:
9272/// I2 --> jmp OUTLINED_FUNCTION I1
9273/// ret I2
9274/// ret
9275///
9276/// * Call construction overhead: 1 (jump instruction)
9277/// * Frame construction overhead: 0 (don't need to return)
9278///
9279enum MachineOutlinerClass {
9280 MachineOutlinerDefault,
9281 MachineOutlinerTailCall
9282};
9283
9284outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
9285 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
9286 unsigned SequenceSize =
9287 std::accumulate(RepeatedSequenceLocs[0].front(),
9288 std::next(RepeatedSequenceLocs[0].back()), 0,
9289 [](unsigned Sum, const MachineInstr &MI) {
9290 // FIXME: x86 doesn't implement getInstSizeInBytes, so
9291 // we can't tell the cost. Just assume each instruction
9292 // is one byte.
9293 if (MI.isDebugInstr() || MI.isKill())
9294 return Sum;
9295 return Sum + 1;
9296 });
9297
9298 // We check to see if CFI Instructions are present, and if they are
9299 // we find the number of CFI Instructions in the candidates.
9300 unsigned CFICount = 0;
9301 MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
9302 for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
9303 Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
9304 if (MBBI->isCFIInstruction())
9305 CFICount++;
9306 MBBI++;
9307 }
9308
9309 // We compare the number of found CFI Instructions to the number of CFI
9310 // instructions in the parent function for each candidate. We must check this
9311 // since if we outline one of the CFI instructions in a function, we have to
9312 // outline them all for correctness. If we do not, the address offsets will be
9313 // incorrect between the two sections of the program.
9314 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9315 std::vector<MCCFIInstruction> CFIInstructions =
9316 C.getMF()->getFrameInstructions();
9317
9318 if (CFICount > 0 && CFICount != CFIInstructions.size())
9319 return outliner::OutlinedFunction();
9320 }
9321
9322 // FIXME: Use real size in bytes for call and ret instructions.
9323 if (RepeatedSequenceLocs[0].back()->isTerminator()) {
9324 for (outliner::Candidate &C : RepeatedSequenceLocs)
9325 C.setCallInfo(MachineOutlinerTailCall, 1);
9326
9327 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
9328 0, // Number of bytes to emit frame.
9329 MachineOutlinerTailCall // Type of frame.
9330 );
9331 }
9332
9333 if (CFICount > 0)
9334 return outliner::OutlinedFunction();
9335
9336 for (outliner::Candidate &C : RepeatedSequenceLocs)
9337 C.setCallInfo(MachineOutlinerDefault, 1);
9338
9339 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 1,
9340 MachineOutlinerDefault);
9341}
9342
9343bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
9344 bool OutlineFromLinkOnceODRs) const {
9345 const Function &F = MF.getFunction();
9346
9347 // Does the function use a red zone? If it does, then we can't risk messing
9348 // with the stack.
9349 if (Subtarget.getFrameLowering()->has128ByteRedZone(MF)) {
9350 // It could have a red zone. If it does, then we don't want to touch it.
9351 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
9352 if (!X86FI || X86FI->getUsesRedZone())
9353 return false;
9354 }
9355
9356 // If we *don't* want to outline from things that could potentially be deduped
9357 // then return false.
9358 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9359 return false;
9360
9361 // This function is viable for outlining, so return true.
9362 return true;
9363}
9364
9365outliner::InstrType
9366X86InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
9367 MachineInstr &MI = *MIT;
9368 // Don't allow debug values to impact outlining type.
9369 if (MI.isDebugInstr() || MI.isIndirectDebugValue())
9370 return outliner::InstrType::Invisible;
9371
9372 // At this point, KILL instructions don't really tell us much so we can go
9373 // ahead and skip over them.
9374 if (MI.isKill())
9375 return outliner::InstrType::Invisible;
9376
9377 // Is this a tail call? If yes, we can outline as a tail call.
9378 if (isTailCall(MI))
9379 return outliner::InstrType::Legal;
9380
9381 // Is this the terminator of a basic block?
9382 if (MI.isTerminator() || MI.isReturn()) {
9383
9384 // Does its parent have any successors in its MachineFunction?
9385 if (MI.getParent()->succ_empty())
9386 return outliner::InstrType::Legal;
9387
9388 // It does, so we can't tail call it.
9389 return outliner::InstrType::Illegal;
9390 }
9391
9392 // Don't outline anything that modifies or reads from the stack pointer.
9393 //
9394 // FIXME: There are instructions which are being manually built without
9395 // explicit uses/defs so we also have to check the MCInstrDesc. We should be
9396 // able to remove the extra checks once those are fixed up. For example,
9397 // sometimes we might get something like %rax = POP64r 1. This won't be
9398 // caught by modifiesRegister or readsRegister even though the instruction
9399 // really ought to be formed so that modifiesRegister/readsRegister would
9400 // catch it.
9401 if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
9402 MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
9403 MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
9404 return outliner::InstrType::Illegal;
9405
9406 // Outlined calls change the instruction pointer, so don't read from it.
9407 if (MI.readsRegister(X86::RIP, &RI) ||
9408 MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
9409 MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
9410 return outliner::InstrType::Illegal;
9411
9412 // Positions can't safely be outlined.
9413 if (MI.isPosition())
9414 return outliner::InstrType::Illegal;
9415
9416 // Make sure none of the operands of this instruction do anything tricky.
9417 for (const MachineOperand &MOP : MI.operands())
9418 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
9419 MOP.isTargetIndex())
9420 return outliner::InstrType::Illegal;
9421
9422 return outliner::InstrType::Legal;
9423}
9424
9425void X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB,
9426 MachineFunction &MF,
9427 const outliner::OutlinedFunction &OF)
9428 const {
9429 // If we're a tail call, we already have a return, so don't do anything.
9430 if (OF.FrameConstructionID == MachineOutlinerTailCall)
9431 return;
9432
9433 // We're a normal call, so our sequence doesn't have a return instruction.
9434 // Add it in.
9435 MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RET64));
9436 MBB.insert(MBB.end(), retq);
9437}
9438
9439MachineBasicBlock::iterator
9440X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
9441 MachineBasicBlock::iterator &It,
9442 MachineFunction &MF,
9443 const outliner::Candidate &C) const {
9444 // Is it a tail call?
9445 if (C.CallConstructionID == MachineOutlinerTailCall) {
9446 // Yes, just insert a JMP.
9447 It = MBB.insert(It,
9448 BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
9449 .addGlobalAddress(M.getNamedValue(MF.getName())));
9450 } else {
9451 // No, insert a call.
9452 It = MBB.insert(It,
9453 BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
9454 .addGlobalAddress(M.getNamedValue(MF.getName())));
9455 }
9456
9457 return It;
9458}
9459
9460#define GET_INSTRINFO_HELPERS
9461#include "X86GenInstrInfo.inc"