LLVM 20.0.0git
X86InstrInfo.cpp
Go to the documentation of this file.
1//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86InstrInfo.h"
14#include "X86.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrFoldTables.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Sequence.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Module.h"
40#include "llvm/MC/MCAsmInfo.h"
41#include "llvm/MC/MCExpr.h"
42#include "llvm/MC/MCInst.h"
44#include "llvm/Support/Debug.h"
48#include <optional>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "x86-instr-info"
53
54#define GET_INSTRINFO_CTOR_DTOR
55#include "X86GenInstrInfo.inc"
56
57static cl::opt<bool>
58 NoFusing("disable-spill-fusing",
59 cl::desc("Disable fusing of spill code into instructions"),
61static cl::opt<bool>
62 PrintFailedFusing("print-failed-fuse-candidates",
63 cl::desc("Print instructions that the allocator wants to"
64 " fuse, but the X86 backend currently can't"),
66static cl::opt<bool>
67 ReMatPICStubLoad("remat-pic-stub-load",
68 cl::desc("Re-materialize load from stub in PIC mode"),
69 cl::init(false), cl::Hidden);
71 PartialRegUpdateClearance("partial-reg-update-clearance",
72 cl::desc("Clearance between two register writes "
73 "for inserting XOR to avoid partial "
74 "register update"),
75 cl::init(64), cl::Hidden);
77 "undef-reg-clearance",
78 cl::desc("How many idle instructions we would like before "
79 "certain undef register reads"),
80 cl::init(128), cl::Hidden);
81
82// Pin the vtable to this file.
83void X86InstrInfo::anchor() {}
84
86 : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
87 : X86::ADJCALLSTACKDOWN32),
88 (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
89 : X86::ADJCALLSTACKUP32),
90 X86::CATCHRET, (STI.is64Bit() ? X86::RET64 : X86::RET32)),
91 Subtarget(STI), RI(STI.getTargetTriple()) {}
92
94X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
96 const MachineFunction &MF) const {
97 auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF);
98 // If the target does not have egpr, then r16-r31 will be resereved for all
99 // instructions.
100 if (!RC || !Subtarget.hasEGPR())
101 return RC;
102
104 return RC;
105
106 switch (RC->getID()) {
107 default:
108 return RC;
109 case X86::GR8RegClassID:
110 return &X86::GR8_NOREX2RegClass;
111 case X86::GR16RegClassID:
112 return &X86::GR16_NOREX2RegClass;
113 case X86::GR32RegClassID:
114 return &X86::GR32_NOREX2RegClass;
115 case X86::GR64RegClassID:
116 return &X86::GR64_NOREX2RegClass;
117 case X86::GR32_NOSPRegClassID:
118 return &X86::GR32_NOREX2_NOSPRegClass;
119 case X86::GR64_NOSPRegClassID:
120 return &X86::GR64_NOREX2_NOSPRegClass;
121 }
122}
123
125 Register &SrcReg, Register &DstReg,
126 unsigned &SubIdx) const {
127 switch (MI.getOpcode()) {
128 default:
129 break;
130 case X86::MOVSX16rr8:
131 case X86::MOVZX16rr8:
132 case X86::MOVSX32rr8:
133 case X86::MOVZX32rr8:
134 case X86::MOVSX64rr8:
135 if (!Subtarget.is64Bit())
136 // It's not always legal to reference the low 8-bit of the larger
137 // register in 32-bit mode.
138 return false;
139 [[fallthrough]];
140 case X86::MOVSX32rr16:
141 case X86::MOVZX32rr16:
142 case X86::MOVSX64rr16:
143 case X86::MOVSX64rr32: {
144 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
145 // Be conservative.
146 return false;
147 SrcReg = MI.getOperand(1).getReg();
148 DstReg = MI.getOperand(0).getReg();
149 switch (MI.getOpcode()) {
150 default:
151 llvm_unreachable("Unreachable!");
152 case X86::MOVSX16rr8:
153 case X86::MOVZX16rr8:
154 case X86::MOVSX32rr8:
155 case X86::MOVZX32rr8:
156 case X86::MOVSX64rr8:
157 SubIdx = X86::sub_8bit;
158 break;
159 case X86::MOVSX32rr16:
160 case X86::MOVZX32rr16:
161 case X86::MOVSX64rr16:
162 SubIdx = X86::sub_16bit;
163 break;
164 case X86::MOVSX64rr32:
165 SubIdx = X86::sub_32bit;
166 break;
167 }
168 return true;
169 }
170 }
171 return false;
172}
173
175 if (MI.mayLoad() || MI.mayStore())
176 return false;
177
178 // Some target-independent operations that trivially lower to data-invariant
179 // instructions.
180 if (MI.isCopyLike() || MI.isInsertSubreg())
181 return true;
182
183 unsigned Opcode = MI.getOpcode();
184 using namespace X86;
185 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
186 // However, they set flags and are perhaps the most surprisingly constant
187 // time operations so we call them out here separately.
188 if (isIMUL(Opcode))
189 return true;
190 // Bit scanning and counting instructions that are somewhat surprisingly
191 // constant time as they scan across bits and do other fairly complex
192 // operations like popcnt, but are believed to be constant time on x86.
193 // However, these set flags.
194 if (isBSF(Opcode) || isBSR(Opcode) || isLZCNT(Opcode) || isPOPCNT(Opcode) ||
195 isTZCNT(Opcode))
196 return true;
197 // Bit manipulation instructions are effectively combinations of basic
198 // arithmetic ops, and should still execute in constant time. These also
199 // set flags.
200 if (isBLCFILL(Opcode) || isBLCI(Opcode) || isBLCIC(Opcode) ||
201 isBLCMSK(Opcode) || isBLCS(Opcode) || isBLSFILL(Opcode) ||
202 isBLSI(Opcode) || isBLSIC(Opcode) || isBLSMSK(Opcode) || isBLSR(Opcode) ||
203 isTZMSK(Opcode))
204 return true;
205 // Bit extracting and clearing instructions should execute in constant time,
206 // and set flags.
207 if (isBEXTR(Opcode) || isBZHI(Opcode))
208 return true;
209 // Shift and rotate.
210 if (isROL(Opcode) || isROR(Opcode) || isSAR(Opcode) || isSHL(Opcode) ||
211 isSHR(Opcode) || isSHLD(Opcode) || isSHRD(Opcode))
212 return true;
213 // Basic arithmetic is constant time on the input but does set flags.
214 if (isADC(Opcode) || isADD(Opcode) || isAND(Opcode) || isOR(Opcode) ||
215 isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode))
216 return true;
217 // Arithmetic with just 32-bit and 64-bit variants and no immediates.
218 if (isANDN(Opcode))
219 return true;
220 // Unary arithmetic operations.
221 if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode))
222 return true;
223 // Unlike other arithmetic, NOT doesn't set EFLAGS.
224 if (isNOT(Opcode))
225 return true;
226 // Various move instructions used to zero or sign extend things. Note that we
227 // intentionally don't support the _NOREX variants as we can't handle that
228 // register constraint anyways.
229 if (isMOVSX(Opcode) || isMOVZX(Opcode) || isMOVSXD(Opcode) || isMOV(Opcode))
230 return true;
231 // Arithmetic instructions that are both constant time and don't set flags.
232 if (isRORX(Opcode) || isSARX(Opcode) || isSHLX(Opcode) || isSHRX(Opcode))
233 return true;
234 // LEA doesn't actually access memory, and its arithmetic is constant time.
235 if (isLEA(Opcode))
236 return true;
237 // By default, assume that the instruction is not data invariant.
238 return false;
239}
240
242 switch (MI.getOpcode()) {
243 default:
244 // By default, assume that the load will immediately leak.
245 return false;
246
247 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
248 // However, they set flags and are perhaps the most surprisingly constant
249 // time operations so we call them out here separately.
250 case X86::IMUL16rm:
251 case X86::IMUL16rmi:
252 case X86::IMUL32rm:
253 case X86::IMUL32rmi:
254 case X86::IMUL64rm:
255 case X86::IMUL64rmi32:
256
257 // Bit scanning and counting instructions that are somewhat surprisingly
258 // constant time as they scan across bits and do other fairly complex
259 // operations like popcnt, but are believed to be constant time on x86.
260 // However, these set flags.
261 case X86::BSF16rm:
262 case X86::BSF32rm:
263 case X86::BSF64rm:
264 case X86::BSR16rm:
265 case X86::BSR32rm:
266 case X86::BSR64rm:
267 case X86::LZCNT16rm:
268 case X86::LZCNT32rm:
269 case X86::LZCNT64rm:
270 case X86::POPCNT16rm:
271 case X86::POPCNT32rm:
272 case X86::POPCNT64rm:
273 case X86::TZCNT16rm:
274 case X86::TZCNT32rm:
275 case X86::TZCNT64rm:
276
277 // Bit manipulation instructions are effectively combinations of basic
278 // arithmetic ops, and should still execute in constant time. These also
279 // set flags.
280 case X86::BLCFILL32rm:
281 case X86::BLCFILL64rm:
282 case X86::BLCI32rm:
283 case X86::BLCI64rm:
284 case X86::BLCIC32rm:
285 case X86::BLCIC64rm:
286 case X86::BLCMSK32rm:
287 case X86::BLCMSK64rm:
288 case X86::BLCS32rm:
289 case X86::BLCS64rm:
290 case X86::BLSFILL32rm:
291 case X86::BLSFILL64rm:
292 case X86::BLSI32rm:
293 case X86::BLSI64rm:
294 case X86::BLSIC32rm:
295 case X86::BLSIC64rm:
296 case X86::BLSMSK32rm:
297 case X86::BLSMSK64rm:
298 case X86::BLSR32rm:
299 case X86::BLSR64rm:
300 case X86::TZMSK32rm:
301 case X86::TZMSK64rm:
302
303 // Bit extracting and clearing instructions should execute in constant time,
304 // and set flags.
305 case X86::BEXTR32rm:
306 case X86::BEXTR64rm:
307 case X86::BEXTRI32mi:
308 case X86::BEXTRI64mi:
309 case X86::BZHI32rm:
310 case X86::BZHI64rm:
311
312 // Basic arithmetic is constant time on the input but does set flags.
313 case X86::ADC8rm:
314 case X86::ADC16rm:
315 case X86::ADC32rm:
316 case X86::ADC64rm:
317 case X86::ADD8rm:
318 case X86::ADD16rm:
319 case X86::ADD32rm:
320 case X86::ADD64rm:
321 case X86::AND8rm:
322 case X86::AND16rm:
323 case X86::AND32rm:
324 case X86::AND64rm:
325 case X86::ANDN32rm:
326 case X86::ANDN64rm:
327 case X86::OR8rm:
328 case X86::OR16rm:
329 case X86::OR32rm:
330 case X86::OR64rm:
331 case X86::SBB8rm:
332 case X86::SBB16rm:
333 case X86::SBB32rm:
334 case X86::SBB64rm:
335 case X86::SUB8rm:
336 case X86::SUB16rm:
337 case X86::SUB32rm:
338 case X86::SUB64rm:
339 case X86::XOR8rm:
340 case X86::XOR16rm:
341 case X86::XOR32rm:
342 case X86::XOR64rm:
343
344 // Integer multiply w/o affecting flags is still believed to be constant
345 // time on x86. Called out separately as this is among the most surprising
346 // instructions to exhibit that behavior.
347 case X86::MULX32rm:
348 case X86::MULX64rm:
349
350 // Arithmetic instructions that are both constant time and don't set flags.
351 case X86::RORX32mi:
352 case X86::RORX64mi:
353 case X86::SARX32rm:
354 case X86::SARX64rm:
355 case X86::SHLX32rm:
356 case X86::SHLX64rm:
357 case X86::SHRX32rm:
358 case X86::SHRX64rm:
359
360 // Conversions are believed to be constant time and don't set flags.
361 case X86::CVTTSD2SI64rm:
362 case X86::VCVTTSD2SI64rm:
363 case X86::VCVTTSD2SI64Zrm:
364 case X86::CVTTSD2SIrm:
365 case X86::VCVTTSD2SIrm:
366 case X86::VCVTTSD2SIZrm:
367 case X86::CVTTSS2SI64rm:
368 case X86::VCVTTSS2SI64rm:
369 case X86::VCVTTSS2SI64Zrm:
370 case X86::CVTTSS2SIrm:
371 case X86::VCVTTSS2SIrm:
372 case X86::VCVTTSS2SIZrm:
373 case X86::CVTSI2SDrm:
374 case X86::VCVTSI2SDrm:
375 case X86::VCVTSI2SDZrm:
376 case X86::CVTSI2SSrm:
377 case X86::VCVTSI2SSrm:
378 case X86::VCVTSI2SSZrm:
379 case X86::CVTSI642SDrm:
380 case X86::VCVTSI642SDrm:
381 case X86::VCVTSI642SDZrm:
382 case X86::CVTSI642SSrm:
383 case X86::VCVTSI642SSrm:
384 case X86::VCVTSI642SSZrm:
385 case X86::CVTSS2SDrm:
386 case X86::VCVTSS2SDrm:
387 case X86::VCVTSS2SDZrm:
388 case X86::CVTSD2SSrm:
389 case X86::VCVTSD2SSrm:
390 case X86::VCVTSD2SSZrm:
391 // AVX512 added unsigned integer conversions.
392 case X86::VCVTTSD2USI64Zrm:
393 case X86::VCVTTSD2USIZrm:
394 case X86::VCVTTSS2USI64Zrm:
395 case X86::VCVTTSS2USIZrm:
396 case X86::VCVTUSI2SDZrm:
397 case X86::VCVTUSI642SDZrm:
398 case X86::VCVTUSI2SSZrm:
399 case X86::VCVTUSI642SSZrm:
400
401 // Loads to register don't set flags.
402 case X86::MOV8rm:
403 case X86::MOV8rm_NOREX:
404 case X86::MOV16rm:
405 case X86::MOV32rm:
406 case X86::MOV64rm:
407 case X86::MOVSX16rm8:
408 case X86::MOVSX32rm16:
409 case X86::MOVSX32rm8:
410 case X86::MOVSX32rm8_NOREX:
411 case X86::MOVSX64rm16:
412 case X86::MOVSX64rm32:
413 case X86::MOVSX64rm8:
414 case X86::MOVZX16rm8:
415 case X86::MOVZX32rm16:
416 case X86::MOVZX32rm8:
417 case X86::MOVZX32rm8_NOREX:
418 case X86::MOVZX64rm16:
419 case X86::MOVZX64rm8:
420 return true;
421 }
422}
423
425 const MachineFunction *MF = MI.getParent()->getParent();
427
428 if (isFrameInstr(MI)) {
429 int SPAdj = alignTo(getFrameSize(MI), TFI->getStackAlign());
430 SPAdj -= getFrameAdjustment(MI);
431 if (!isFrameSetup(MI))
432 SPAdj = -SPAdj;
433 return SPAdj;
434 }
435
436 // To know whether a call adjusts the stack, we need information
437 // that is bound to the following ADJCALLSTACKUP pseudo.
438 // Look for the next ADJCALLSTACKUP that follows the call.
439 if (MI.isCall()) {
440 const MachineBasicBlock *MBB = MI.getParent();
442 for (auto E = MBB->end(); I != E; ++I) {
443 if (I->getOpcode() == getCallFrameDestroyOpcode() || I->isCall())
444 break;
445 }
446
447 // If we could not find a frame destroy opcode, then it has already
448 // been simplified, so we don't care.
449 if (I->getOpcode() != getCallFrameDestroyOpcode())
450 return 0;
451
452 return -(I->getOperand(1).getImm());
453 }
454
455 // Currently handle only PUSHes we can reasonably expect to see
456 // in call sequences
457 switch (MI.getOpcode()) {
458 default:
459 return 0;
460 case X86::PUSH32r:
461 case X86::PUSH32rmm:
462 case X86::PUSH32rmr:
463 case X86::PUSH32i:
464 return 4;
465 case X86::PUSH64r:
466 case X86::PUSH64rmm:
467 case X86::PUSH64rmr:
468 case X86::PUSH64i32:
469 return 8;
470 }
471}
472
473/// Return true and the FrameIndex if the specified
474/// operand and follow operands form a reference to the stack frame.
475bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
476 int &FrameIndex) const {
477 if (MI.getOperand(Op + X86::AddrBaseReg).isFI() &&
478 MI.getOperand(Op + X86::AddrScaleAmt).isImm() &&
479 MI.getOperand(Op + X86::AddrIndexReg).isReg() &&
480 MI.getOperand(Op + X86::AddrDisp).isImm() &&
481 MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 &&
482 MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 &&
483 MI.getOperand(Op + X86::AddrDisp).getImm() == 0) {
484 FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex();
485 return true;
486 }
487 return false;
488}
489
490static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
491 switch (Opcode) {
492 default:
493 return false;
494 case X86::MOV8rm:
495 case X86::KMOVBkm:
496 case X86::KMOVBkm_EVEX:
497 MemBytes = 1;
498 return true;
499 case X86::MOV16rm:
500 case X86::KMOVWkm:
501 case X86::KMOVWkm_EVEX:
502 case X86::VMOVSHZrm:
503 case X86::VMOVSHZrm_alt:
504 MemBytes = 2;
505 return true;
506 case X86::MOV32rm:
507 case X86::MOVSSrm:
508 case X86::MOVSSrm_alt:
509 case X86::VMOVSSrm:
510 case X86::VMOVSSrm_alt:
511 case X86::VMOVSSZrm:
512 case X86::VMOVSSZrm_alt:
513 case X86::KMOVDkm:
514 case X86::KMOVDkm_EVEX:
515 MemBytes = 4;
516 return true;
517 case X86::MOV64rm:
518 case X86::LD_Fp64m:
519 case X86::MOVSDrm:
520 case X86::MOVSDrm_alt:
521 case X86::VMOVSDrm:
522 case X86::VMOVSDrm_alt:
523 case X86::VMOVSDZrm:
524 case X86::VMOVSDZrm_alt:
525 case X86::MMX_MOVD64rm:
526 case X86::MMX_MOVQ64rm:
527 case X86::KMOVQkm:
528 case X86::KMOVQkm_EVEX:
529 MemBytes = 8;
530 return true;
531 case X86::MOVAPSrm:
532 case X86::MOVUPSrm:
533 case X86::MOVAPDrm:
534 case X86::MOVUPDrm:
535 case X86::MOVDQArm:
536 case X86::MOVDQUrm:
537 case X86::VMOVAPSrm:
538 case X86::VMOVUPSrm:
539 case X86::VMOVAPDrm:
540 case X86::VMOVUPDrm:
541 case X86::VMOVDQArm:
542 case X86::VMOVDQUrm:
543 case X86::VMOVAPSZ128rm:
544 case X86::VMOVUPSZ128rm:
545 case X86::VMOVAPSZ128rm_NOVLX:
546 case X86::VMOVUPSZ128rm_NOVLX:
547 case X86::VMOVAPDZ128rm:
548 case X86::VMOVUPDZ128rm:
549 case X86::VMOVDQU8Z128rm:
550 case X86::VMOVDQU16Z128rm:
551 case X86::VMOVDQA32Z128rm:
552 case X86::VMOVDQU32Z128rm:
553 case X86::VMOVDQA64Z128rm:
554 case X86::VMOVDQU64Z128rm:
555 MemBytes = 16;
556 return true;
557 case X86::VMOVAPSYrm:
558 case X86::VMOVUPSYrm:
559 case X86::VMOVAPDYrm:
560 case X86::VMOVUPDYrm:
561 case X86::VMOVDQAYrm:
562 case X86::VMOVDQUYrm:
563 case X86::VMOVAPSZ256rm:
564 case X86::VMOVUPSZ256rm:
565 case X86::VMOVAPSZ256rm_NOVLX:
566 case X86::VMOVUPSZ256rm_NOVLX:
567 case X86::VMOVAPDZ256rm:
568 case X86::VMOVUPDZ256rm:
569 case X86::VMOVDQU8Z256rm:
570 case X86::VMOVDQU16Z256rm:
571 case X86::VMOVDQA32Z256rm:
572 case X86::VMOVDQU32Z256rm:
573 case X86::VMOVDQA64Z256rm:
574 case X86::VMOVDQU64Z256rm:
575 MemBytes = 32;
576 return true;
577 case X86::VMOVAPSZrm:
578 case X86::VMOVUPSZrm:
579 case X86::VMOVAPDZrm:
580 case X86::VMOVUPDZrm:
581 case X86::VMOVDQU8Zrm:
582 case X86::VMOVDQU16Zrm:
583 case X86::VMOVDQA32Zrm:
584 case X86::VMOVDQU32Zrm:
585 case X86::VMOVDQA64Zrm:
586 case X86::VMOVDQU64Zrm:
587 MemBytes = 64;
588 return true;
589 }
590}
591
592static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
593 switch (Opcode) {
594 default:
595 return false;
596 case X86::MOV8mr:
597 case X86::KMOVBmk:
598 case X86::KMOVBmk_EVEX:
599 MemBytes = 1;
600 return true;
601 case X86::MOV16mr:
602 case X86::KMOVWmk:
603 case X86::KMOVWmk_EVEX:
604 case X86::VMOVSHZmr:
605 MemBytes = 2;
606 return true;
607 case X86::MOV32mr:
608 case X86::MOVSSmr:
609 case X86::VMOVSSmr:
610 case X86::VMOVSSZmr:
611 case X86::KMOVDmk:
612 case X86::KMOVDmk_EVEX:
613 MemBytes = 4;
614 return true;
615 case X86::MOV64mr:
616 case X86::ST_FpP64m:
617 case X86::MOVSDmr:
618 case X86::VMOVSDmr:
619 case X86::VMOVSDZmr:
620 case X86::MMX_MOVD64mr:
621 case X86::MMX_MOVQ64mr:
622 case X86::MMX_MOVNTQmr:
623 case X86::KMOVQmk:
624 case X86::KMOVQmk_EVEX:
625 MemBytes = 8;
626 return true;
627 case X86::MOVAPSmr:
628 case X86::MOVUPSmr:
629 case X86::MOVAPDmr:
630 case X86::MOVUPDmr:
631 case X86::MOVDQAmr:
632 case X86::MOVDQUmr:
633 case X86::VMOVAPSmr:
634 case X86::VMOVUPSmr:
635 case X86::VMOVAPDmr:
636 case X86::VMOVUPDmr:
637 case X86::VMOVDQAmr:
638 case X86::VMOVDQUmr:
639 case X86::VMOVUPSZ128mr:
640 case X86::VMOVAPSZ128mr:
641 case X86::VMOVUPSZ128mr_NOVLX:
642 case X86::VMOVAPSZ128mr_NOVLX:
643 case X86::VMOVUPDZ128mr:
644 case X86::VMOVAPDZ128mr:
645 case X86::VMOVDQA32Z128mr:
646 case X86::VMOVDQU32Z128mr:
647 case X86::VMOVDQA64Z128mr:
648 case X86::VMOVDQU64Z128mr:
649 case X86::VMOVDQU8Z128mr:
650 case X86::VMOVDQU16Z128mr:
651 MemBytes = 16;
652 return true;
653 case X86::VMOVUPSYmr:
654 case X86::VMOVAPSYmr:
655 case X86::VMOVUPDYmr:
656 case X86::VMOVAPDYmr:
657 case X86::VMOVDQUYmr:
658 case X86::VMOVDQAYmr:
659 case X86::VMOVUPSZ256mr:
660 case X86::VMOVAPSZ256mr:
661 case X86::VMOVUPSZ256mr_NOVLX:
662 case X86::VMOVAPSZ256mr_NOVLX:
663 case X86::VMOVUPDZ256mr:
664 case X86::VMOVAPDZ256mr:
665 case X86::VMOVDQU8Z256mr:
666 case X86::VMOVDQU16Z256mr:
667 case X86::VMOVDQA32Z256mr:
668 case X86::VMOVDQU32Z256mr:
669 case X86::VMOVDQA64Z256mr:
670 case X86::VMOVDQU64Z256mr:
671 MemBytes = 32;
672 return true;
673 case X86::VMOVUPSZmr:
674 case X86::VMOVAPSZmr:
675 case X86::VMOVUPDZmr:
676 case X86::VMOVAPDZmr:
677 case X86::VMOVDQU8Zmr:
678 case X86::VMOVDQU16Zmr:
679 case X86::VMOVDQA32Zmr:
680 case X86::VMOVDQU32Zmr:
681 case X86::VMOVDQA64Zmr:
682 case X86::VMOVDQU64Zmr:
683 MemBytes = 64;
684 return true;
685 }
686 return false;
687}
688
690 int &FrameIndex) const {
691 unsigned Dummy;
692 return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
693}
694
696 int &FrameIndex,
697 unsigned &MemBytes) const {
698 if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
699 if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
700 return MI.getOperand(0).getReg();
701 return 0;
702}
703
705 int &FrameIndex) const {
706 unsigned Dummy;
707 if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
708 unsigned Reg;
709 if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
710 return Reg;
711 // Check for post-frame index elimination operations
713 if (hasLoadFromStackSlot(MI, Accesses)) {
714 FrameIndex =
715 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
716 ->getFrameIndex();
717 return MI.getOperand(0).getReg();
718 }
719 }
720 return 0;
721}
722
724 int &FrameIndex) const {
725 unsigned Dummy;
726 return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
727}
728
730 int &FrameIndex,
731 unsigned &MemBytes) const {
732 if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
733 if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
734 isFrameOperand(MI, 0, FrameIndex))
735 return MI.getOperand(X86::AddrNumOperands).getReg();
736 return 0;
737}
738
740 int &FrameIndex) const {
741 unsigned Dummy;
742 if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
743 unsigned Reg;
744 if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
745 return Reg;
746 // Check for post-frame index elimination operations
748 if (hasStoreToStackSlot(MI, Accesses)) {
749 FrameIndex =
750 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
751 ->getFrameIndex();
752 return MI.getOperand(X86::AddrNumOperands).getReg();
753 }
754 }
755 return 0;
756}
757
758/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
759static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
760 // Don't waste compile time scanning use-def chains of physregs.
761 if (!BaseReg.isVirtual())
762 return false;
763 bool isPICBase = false;
764 for (const MachineInstr &DefMI : MRI.def_instructions(BaseReg)) {
765 if (DefMI.getOpcode() != X86::MOVPC32r)
766 return false;
767 assert(!isPICBase && "More than one PIC base?");
768 isPICBase = true;
769 }
770 return isPICBase;
771}
772
774 const MachineInstr &MI) const {
775 switch (MI.getOpcode()) {
776 default:
777 // This function should only be called for opcodes with the ReMaterializable
778 // flag set.
779 llvm_unreachable("Unknown rematerializable operation!");
780 break;
781 case X86::IMPLICIT_DEF:
782 // Defer to generic logic.
783 break;
784 case X86::LOAD_STACK_GUARD:
785 case X86::LD_Fp032:
786 case X86::LD_Fp064:
787 case X86::LD_Fp080:
788 case X86::LD_Fp132:
789 case X86::LD_Fp164:
790 case X86::LD_Fp180:
791 case X86::AVX1_SETALLONES:
792 case X86::AVX2_SETALLONES:
793 case X86::AVX512_128_SET0:
794 case X86::AVX512_256_SET0:
795 case X86::AVX512_512_SET0:
796 case X86::AVX512_512_SETALLONES:
797 case X86::AVX512_FsFLD0SD:
798 case X86::AVX512_FsFLD0SH:
799 case X86::AVX512_FsFLD0SS:
800 case X86::AVX512_FsFLD0F128:
801 case X86::AVX_SET0:
802 case X86::FsFLD0SD:
803 case X86::FsFLD0SS:
804 case X86::FsFLD0SH:
805 case X86::FsFLD0F128:
806 case X86::KSET0D:
807 case X86::KSET0Q:
808 case X86::KSET0W:
809 case X86::KSET1D:
810 case X86::KSET1Q:
811 case X86::KSET1W:
812 case X86::MMX_SET0:
813 case X86::MOV32ImmSExti8:
814 case X86::MOV32r0:
815 case X86::MOV32r1:
816 case X86::MOV32r_1:
817 case X86::MOV32ri64:
818 case X86::MOV64ImmSExti8:
819 case X86::V_SET0:
820 case X86::V_SETALLONES:
821 case X86::MOV16ri:
822 case X86::MOV32ri:
823 case X86::MOV64ri:
824 case X86::MOV64ri32:
825 case X86::MOV8ri:
826 case X86::PTILEZEROV:
827 return true;
828
829 case X86::MOV8rm:
830 case X86::MOV8rm_NOREX:
831 case X86::MOV16rm:
832 case X86::MOV32rm:
833 case X86::MOV64rm:
834 case X86::MOVSSrm:
835 case X86::MOVSSrm_alt:
836 case X86::MOVSDrm:
837 case X86::MOVSDrm_alt:
838 case X86::MOVAPSrm:
839 case X86::MOVUPSrm:
840 case X86::MOVAPDrm:
841 case X86::MOVUPDrm:
842 case X86::MOVDQArm:
843 case X86::MOVDQUrm:
844 case X86::VMOVSSrm:
845 case X86::VMOVSSrm_alt:
846 case X86::VMOVSDrm:
847 case X86::VMOVSDrm_alt:
848 case X86::VMOVAPSrm:
849 case X86::VMOVUPSrm:
850 case X86::VMOVAPDrm:
851 case X86::VMOVUPDrm:
852 case X86::VMOVDQArm:
853 case X86::VMOVDQUrm:
854 case X86::VMOVAPSYrm:
855 case X86::VMOVUPSYrm:
856 case X86::VMOVAPDYrm:
857 case X86::VMOVUPDYrm:
858 case X86::VMOVDQAYrm:
859 case X86::VMOVDQUYrm:
860 case X86::MMX_MOVD64rm:
861 case X86::MMX_MOVQ64rm:
862 case X86::VBROADCASTSSrm:
863 case X86::VBROADCASTSSYrm:
864 case X86::VBROADCASTSDYrm:
865 // AVX-512
866 case X86::VPBROADCASTBZ128rm:
867 case X86::VPBROADCASTBZ256rm:
868 case X86::VPBROADCASTBZrm:
869 case X86::VBROADCASTF32X2Z256rm:
870 case X86::VBROADCASTF32X2Zrm:
871 case X86::VBROADCASTI32X2Z128rm:
872 case X86::VBROADCASTI32X2Z256rm:
873 case X86::VBROADCASTI32X2Zrm:
874 case X86::VPBROADCASTWZ128rm:
875 case X86::VPBROADCASTWZ256rm:
876 case X86::VPBROADCASTWZrm:
877 case X86::VPBROADCASTDZ128rm:
878 case X86::VPBROADCASTDZ256rm:
879 case X86::VPBROADCASTDZrm:
880 case X86::VBROADCASTSSZ128rm:
881 case X86::VBROADCASTSSZ256rm:
882 case X86::VBROADCASTSSZrm:
883 case X86::VPBROADCASTQZ128rm:
884 case X86::VPBROADCASTQZ256rm:
885 case X86::VPBROADCASTQZrm:
886 case X86::VBROADCASTSDZ256rm:
887 case X86::VBROADCASTSDZrm:
888 case X86::VMOVSSZrm:
889 case X86::VMOVSSZrm_alt:
890 case X86::VMOVSDZrm:
891 case X86::VMOVSDZrm_alt:
892 case X86::VMOVSHZrm:
893 case X86::VMOVSHZrm_alt:
894 case X86::VMOVAPDZ128rm:
895 case X86::VMOVAPDZ256rm:
896 case X86::VMOVAPDZrm:
897 case X86::VMOVAPSZ128rm:
898 case X86::VMOVAPSZ256rm:
899 case X86::VMOVAPSZ128rm_NOVLX:
900 case X86::VMOVAPSZ256rm_NOVLX:
901 case X86::VMOVAPSZrm:
902 case X86::VMOVDQA32Z128rm:
903 case X86::VMOVDQA32Z256rm:
904 case X86::VMOVDQA32Zrm:
905 case X86::VMOVDQA64Z128rm:
906 case X86::VMOVDQA64Z256rm:
907 case X86::VMOVDQA64Zrm:
908 case X86::VMOVDQU16Z128rm:
909 case X86::VMOVDQU16Z256rm:
910 case X86::VMOVDQU16Zrm:
911 case X86::VMOVDQU32Z128rm:
912 case X86::VMOVDQU32Z256rm:
913 case X86::VMOVDQU32Zrm:
914 case X86::VMOVDQU64Z128rm:
915 case X86::VMOVDQU64Z256rm:
916 case X86::VMOVDQU64Zrm:
917 case X86::VMOVDQU8Z128rm:
918 case X86::VMOVDQU8Z256rm:
919 case X86::VMOVDQU8Zrm:
920 case X86::VMOVUPDZ128rm:
921 case X86::VMOVUPDZ256rm:
922 case X86::VMOVUPDZrm:
923 case X86::VMOVUPSZ128rm:
924 case X86::VMOVUPSZ256rm:
925 case X86::VMOVUPSZ128rm_NOVLX:
926 case X86::VMOVUPSZ256rm_NOVLX:
927 case X86::VMOVUPSZrm: {
928 // Loads from constant pools are trivially rematerializable.
929 if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
930 MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
931 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
932 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
933 MI.isDereferenceableInvariantLoad()) {
934 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
935 if (BaseReg == 0 || BaseReg == X86::RIP)
936 return true;
937 // Allow re-materialization of PIC load.
938 if (!(!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal())) {
939 const MachineFunction &MF = *MI.getParent()->getParent();
940 const MachineRegisterInfo &MRI = MF.getRegInfo();
941 if (regIsPICBase(BaseReg, MRI))
942 return true;
943 }
944 }
945 break;
946 }
947
948 case X86::LEA32r:
949 case X86::LEA64r: {
950 if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
951 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
952 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
953 !MI.getOperand(1 + X86::AddrDisp).isReg()) {
954 // lea fi#, lea GV, etc. are all rematerializable.
955 if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
956 return true;
957 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
958 if (BaseReg == 0)
959 return true;
960 // Allow re-materialization of lea PICBase + x.
961 const MachineFunction &MF = *MI.getParent()->getParent();
962 const MachineRegisterInfo &MRI = MF.getRegInfo();
963 if (regIsPICBase(BaseReg, MRI))
964 return true;
965 }
966 break;
967 }
968 }
970}
971
974 Register DestReg, unsigned SubIdx,
975 const MachineInstr &Orig,
976 const TargetRegisterInfo &TRI) const {
977 bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
978 if (ClobbersEFLAGS && MBB.computeRegisterLiveness(&TRI, X86::EFLAGS, I) !=
980 // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
981 // effects.
982 int Value;
983 switch (Orig.getOpcode()) {
984 case X86::MOV32r0:
985 Value = 0;
986 break;
987 case X86::MOV32r1:
988 Value = 1;
989 break;
990 case X86::MOV32r_1:
991 Value = -1;
992 break;
993 default:
994 llvm_unreachable("Unexpected instruction!");
995 }
996
997 const DebugLoc &DL = Orig.getDebugLoc();
998 BuildMI(MBB, I, DL, get(X86::MOV32ri))
999 .add(Orig.getOperand(0))
1000 .addImm(Value);
1001 } else {
1003 MBB.insert(I, MI);
1004 }
1005
1006 MachineInstr &NewMI = *std::prev(I);
1007 NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1008}
1009
1010/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
1012 for (const MachineOperand &MO : MI.operands()) {
1013 if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS &&
1014 !MO.isDead()) {
1015 return true;
1016 }
1017 }
1018 return false;
1019}
1020
1021/// Check whether the shift count for a machine operand is non-zero.
1022inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
1023 unsigned ShiftAmtOperandIdx) {
1024 // The shift count is six bits with the REX.W prefix and five bits without.
1025 unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
1026 unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm();
1027 return Imm & ShiftCountMask;
1028}
1029
1030/// Check whether the given shift count is appropriate
1031/// can be represented by a LEA instruction.
1032inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
1033 // Left shift instructions can be transformed into load-effective-address
1034 // instructions if we can encode them appropriately.
1035 // A LEA instruction utilizes a SIB byte to encode its scale factor.
1036 // The SIB.scale field is two bits wide which means that we can encode any
1037 // shift amount less than 4.
1038 return ShAmt < 4 && ShAmt > 0;
1039}
1040
1042 MachineInstr &CmpValDefInstr,
1043 const MachineRegisterInfo *MRI,
1044 MachineInstr **AndInstr,
1045 const TargetRegisterInfo *TRI,
1046 bool &NoSignFlag, bool &ClearsOverflowFlag) {
1047 if (!(CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG &&
1048 CmpInstr.getOpcode() == X86::TEST64rr) &&
1049 !(CmpValDefInstr.getOpcode() == X86::COPY &&
1050 CmpInstr.getOpcode() == X86::TEST16rr))
1051 return false;
1052
1053 // CmpInstr is a TEST16rr/TEST64rr instruction, and
1054 // `X86InstrInfo::analyzeCompare` guarantees that it's analyzable only if two
1055 // registers are identical.
1056 assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
1057 "CmpInstr is an analyzable TEST16rr/TEST64rr, and "
1058 "`X86InstrInfo::analyzeCompare` requires two reg operands are the"
1059 "same.");
1060
1061 // Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
1062 // `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
1063 // if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
1064 // redundant.
1065 assert(
1066 (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
1067 "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
1068 "is a user of COPY sub16bit.");
1069 MachineInstr *VregDefInstr = nullptr;
1070 if (CmpInstr.getOpcode() == X86::TEST16rr) {
1071 if (!CmpValDefInstr.getOperand(1).getReg().isVirtual())
1072 return false;
1073 VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(1).getReg());
1074 if (!VregDefInstr)
1075 return false;
1076 // We can only remove test when AND32ri or AND64ri32 whose imm can fit 16bit
1077 // size, others 32/64 bit ops would test higher bits which test16rr don't
1078 // want to.
1079 if (!((VregDefInstr->getOpcode() == X86::AND32ri ||
1080 VregDefInstr->getOpcode() == X86::AND64ri32) &&
1081 isUInt<16>(VregDefInstr->getOperand(2).getImm())))
1082 return false;
1083 }
1084
1085 if (CmpInstr.getOpcode() == X86::TEST64rr) {
1086 // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is
1087 // typically 0.
1088 if (CmpValDefInstr.getOperand(1).getImm() != 0)
1089 return false;
1090
1091 // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
1092 // sub_32bit or sub_xmm.
1093 if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
1094 return false;
1095
1096 VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
1097 }
1098
1099 assert(VregDefInstr && "Must have a definition (SSA)");
1100
1101 // Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB
1102 // to simplify the subsequent analysis.
1103 //
1104 // FIXME: If `VregDefInstr->getParent()` is the only predecessor of
1105 // `CmpValDefInstr.getParent()`, this could be handled.
1106 if (VregDefInstr->getParent() != CmpValDefInstr.getParent())
1107 return false;
1108
1109 if (X86::isAND(VregDefInstr->getOpcode())) {
1110 // Get a sequence of instructions like
1111 // %reg = and* ... // Set EFLAGS
1112 // ... // EFLAGS not changed
1113 // %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
1114 // test64rr %extended_reg, %extended_reg, implicit-def $eflags
1115 // or
1116 // %reg = and32* ...
1117 // ... // EFLAGS not changed.
1118 // %src_reg = copy %reg.sub_16bit:gr32
1119 // test16rr %src_reg, %src_reg, implicit-def $eflags
1120 //
1121 // If subsequent readers use a subset of bits that don't change
1122 // after `and*` instructions, it's likely that the test64rr could
1123 // be optimized away.
1124 for (const MachineInstr &Instr :
1125 make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)),
1126 MachineBasicBlock::iterator(CmpValDefInstr))) {
1127 // There are instructions between 'VregDefInstr' and
1128 // 'CmpValDefInstr' that modifies EFLAGS.
1129 if (Instr.modifiesRegister(X86::EFLAGS, TRI))
1130 return false;
1131 }
1132
1133 *AndInstr = VregDefInstr;
1134
1135 // AND instruction will essentially update SF and clear OF, so
1136 // NoSignFlag should be false in the sense that SF is modified by `AND`.
1137 //
1138 // However, the implementation artifically sets `NoSignFlag` to true
1139 // to poison the SF bit; that is to say, if SF is looked at later, the
1140 // optimization (to erase TEST64rr) will be disabled.
1141 //
1142 // The reason to poison SF bit is that SF bit value could be different
1143 // in the `AND` and `TEST` operation; signed bit is not known for `AND`,
1144 // and is known to be 0 as a result of `TEST64rr`.
1145 //
1146 // FIXME: As opposed to poisoning the SF bit directly, consider peeking into
1147 // the AND instruction and using the static information to guide peephole
1148 // optimization if possible. For example, it's possible to fold a
1149 // conditional move into a copy if the relevant EFLAG bits could be deduced
1150 // from an immediate operand of and operation.
1151 //
1152 NoSignFlag = true;
1153 // ClearsOverflowFlag is true for AND operation (no surprise).
1154 ClearsOverflowFlag = true;
1155 return true;
1156 }
1157 return false;
1158}
1159
1161 unsigned Opc, bool AllowSP, Register &NewSrc,
1162 bool &isKill, MachineOperand &ImplicitOp,
1163 LiveVariables *LV, LiveIntervals *LIS) const {
1164 MachineFunction &MF = *MI.getParent()->getParent();
1165 const TargetRegisterClass *RC;
1166 if (AllowSP) {
1167 RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
1168 } else {
1169 RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
1170 }
1171 Register SrcReg = Src.getReg();
1172 isKill = MI.killsRegister(SrcReg, /*TRI=*/nullptr);
1173
1174 // For both LEA64 and LEA32 the register already has essentially the right
1175 // type (32-bit or 64-bit) we may just need to forbid SP.
1176 if (Opc != X86::LEA64_32r) {
1177 NewSrc = SrcReg;
1178 assert(!Src.isUndef() && "Undef op doesn't need optimization");
1179
1180 if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
1181 return false;
1182
1183 return true;
1184 }
1185
1186 // This is for an LEA64_32r and incoming registers are 32-bit. One way or
1187 // another we need to add 64-bit registers to the final MI.
1188 if (SrcReg.isPhysical()) {
1189 ImplicitOp = Src;
1190 ImplicitOp.setImplicit();
1191
1192 NewSrc = getX86SubSuperRegister(SrcReg, 64);
1193 assert(NewSrc.isValid() && "Invalid Operand");
1194 assert(!Src.isUndef() && "Undef op doesn't need optimization");
1195 } else {
1196 // Virtual register of the wrong class, we have to create a temporary 64-bit
1197 // vreg to feed into the LEA.
1198 NewSrc = MF.getRegInfo().createVirtualRegister(RC);
1199 MachineInstr *Copy =
1200 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1201 .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
1202 .addReg(SrcReg, getKillRegState(isKill));
1203
1204 // Which is obviously going to be dead after we're done with it.
1205 isKill = true;
1206
1207 if (LV)
1208 LV->replaceKillInstruction(SrcReg, MI, *Copy);
1209
1210 if (LIS) {
1211 SlotIndex CopyIdx = LIS->InsertMachineInstrInMaps(*Copy);
1213 LiveInterval &LI = LIS->getInterval(SrcReg);
1215 if (S->end.getBaseIndex() == Idx)
1216 S->end = CopyIdx.getRegSlot();
1217 }
1218 }
1219
1220 // We've set all the parameters without issue.
1221 return true;
1222}
1223
1224MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
1226 LiveVariables *LV,
1227 LiveIntervals *LIS,
1228 bool Is8BitOp) const {
1229 // We handle 8-bit adds and various 16-bit opcodes in the switch below.
1230 MachineBasicBlock &MBB = *MI.getParent();
1232 assert((Is8BitOp ||
1234 *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
1235 "Unexpected type for LEA transform");
1236
1237 // TODO: For a 32-bit target, we need to adjust the LEA variables with
1238 // something like this:
1239 // Opcode = X86::LEA32r;
1240 // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1241 // OutRegLEA =
1242 // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
1243 // : RegInfo.createVirtualRegister(&X86::GR32RegClass);
1244 if (!Subtarget.is64Bit())
1245 return nullptr;
1246
1247 unsigned Opcode = X86::LEA64_32r;
1248 Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1249 Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
1250 Register InRegLEA2;
1251
1252 // Build and insert into an implicit UNDEF value. This is OK because
1253 // we will be shifting and then extracting the lower 8/16-bits.
1254 // This has the potential to cause partial register stall. e.g.
1255 // movw (%rbp,%rcx,2), %dx
1256 // leal -65(%rdx), %esi
1257 // But testing has shown this *does* help performance in 64-bit mode (at
1258 // least on modern x86 machines).
1259 MachineBasicBlock::iterator MBBI = MI.getIterator();
1260 Register Dest = MI.getOperand(0).getReg();
1261 Register Src = MI.getOperand(1).getReg();
1262 Register Src2;
1263 bool IsDead = MI.getOperand(0).isDead();
1264 bool IsKill = MI.getOperand(1).isKill();
1265 unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
1266 assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
1267 MachineInstr *ImpDef =
1268 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
1269 MachineInstr *InsMI =
1270 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1271 .addReg(InRegLEA, RegState::Define, SubReg)
1272 .addReg(Src, getKillRegState(IsKill));
1273 MachineInstr *ImpDef2 = nullptr;
1274 MachineInstr *InsMI2 = nullptr;
1275
1277 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
1278 switch (MIOpc) {
1279 default:
1280 llvm_unreachable("Unreachable!");
1281 case X86::SHL8ri:
1282 case X86::SHL16ri: {
1283 unsigned ShAmt = MI.getOperand(2).getImm();
1284 MIB.addReg(0)
1285 .addImm(1LL << ShAmt)
1286 .addReg(InRegLEA, RegState::Kill)
1287 .addImm(0)
1288 .addReg(0);
1289 break;
1290 }
1291 case X86::INC8r:
1292 case X86::INC16r:
1293 addRegOffset(MIB, InRegLEA, true, 1);
1294 break;
1295 case X86::DEC8r:
1296 case X86::DEC16r:
1297 addRegOffset(MIB, InRegLEA, true, -1);
1298 break;
1299 case X86::ADD8ri:
1300 case X86::ADD8ri_DB:
1301 case X86::ADD16ri:
1302 case X86::ADD16ri_DB:
1303 addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
1304 break;
1305 case X86::ADD8rr:
1306 case X86::ADD8rr_DB:
1307 case X86::ADD16rr:
1308 case X86::ADD16rr_DB: {
1309 Src2 = MI.getOperand(2).getReg();
1310 bool IsKill2 = MI.getOperand(2).isKill();
1311 assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
1312 if (Src == Src2) {
1313 // ADD8rr/ADD16rr killed %reg1028, %reg1028
1314 // just a single insert_subreg.
1315 addRegReg(MIB, InRegLEA, true, InRegLEA, false);
1316 } else {
1317 if (Subtarget.is64Bit())
1318 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1319 else
1320 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1321 // Build and insert into an implicit UNDEF value. This is OK because
1322 // we will be shifting and then extracting the lower 8/16-bits.
1323 ImpDef2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF),
1324 InRegLEA2);
1325 InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
1326 .addReg(InRegLEA2, RegState::Define, SubReg)
1327 .addReg(Src2, getKillRegState(IsKill2));
1328 addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
1329 }
1330 if (LV && IsKill2 && InsMI2)
1331 LV->replaceKillInstruction(Src2, MI, *InsMI2);
1332 break;
1333 }
1334 }
1335
1336 MachineInstr *NewMI = MIB;
1337 MachineInstr *ExtMI =
1338 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1340 .addReg(OutRegLEA, RegState::Kill, SubReg);
1341
1342 if (LV) {
1343 // Update live variables.
1344 LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
1345 if (InRegLEA2)
1346 LV->getVarInfo(InRegLEA2).Kills.push_back(NewMI);
1347 LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
1348 if (IsKill)
1349 LV->replaceKillInstruction(Src, MI, *InsMI);
1350 if (IsDead)
1351 LV->replaceKillInstruction(Dest, MI, *ExtMI);
1352 }
1353
1354 if (LIS) {
1355 LIS->InsertMachineInstrInMaps(*ImpDef);
1356 SlotIndex InsIdx = LIS->InsertMachineInstrInMaps(*InsMI);
1357 if (ImpDef2)
1358 LIS->InsertMachineInstrInMaps(*ImpDef2);
1359 SlotIndex Ins2Idx;
1360 if (InsMI2)
1361 Ins2Idx = LIS->InsertMachineInstrInMaps(*InsMI2);
1362 SlotIndex NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
1363 SlotIndex ExtIdx = LIS->InsertMachineInstrInMaps(*ExtMI);
1364 LIS->getInterval(InRegLEA);
1365 LIS->getInterval(OutRegLEA);
1366 if (InRegLEA2)
1367 LIS->getInterval(InRegLEA2);
1368
1369 // Move the use of Src up to InsMI.
1370 LiveInterval &SrcLI = LIS->getInterval(Src);
1371 LiveRange::Segment *SrcSeg = SrcLI.getSegmentContaining(NewIdx);
1372 if (SrcSeg->end == NewIdx.getRegSlot())
1373 SrcSeg->end = InsIdx.getRegSlot();
1374
1375 if (InsMI2) {
1376 // Move the use of Src2 up to InsMI2.
1377 LiveInterval &Src2LI = LIS->getInterval(Src2);
1378 LiveRange::Segment *Src2Seg = Src2LI.getSegmentContaining(NewIdx);
1379 if (Src2Seg->end == NewIdx.getRegSlot())
1380 Src2Seg->end = Ins2Idx.getRegSlot();
1381 }
1382
1383 // Move the definition of Dest down to ExtMI.
1384 LiveInterval &DestLI = LIS->getInterval(Dest);
1385 LiveRange::Segment *DestSeg =
1386 DestLI.getSegmentContaining(NewIdx.getRegSlot());
1387 assert(DestSeg->start == NewIdx.getRegSlot() &&
1388 DestSeg->valno->def == NewIdx.getRegSlot());
1389 DestSeg->start = ExtIdx.getRegSlot();
1390 DestSeg->valno->def = ExtIdx.getRegSlot();
1391 }
1392
1393 return ExtMI;
1394}
1395
1396/// This method must be implemented by targets that
1397/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
1398/// may be able to convert a two-address instruction into a true
1399/// three-address instruction on demand. This allows the X86 target (for
1400/// example) to convert ADD and SHL instructions into LEA instructions if they
1401/// would require register copies due to two-addressness.
1402///
1403/// This method returns a null pointer if the transformation cannot be
1404/// performed, otherwise it returns the new instruction.
1405///
1407 LiveVariables *LV,
1408 LiveIntervals *LIS) const {
1409 // The following opcodes also sets the condition code register(s). Only
1410 // convert them to equivalent lea if the condition code register def's
1411 // are dead!
1413 return nullptr;
1414
1415 MachineFunction &MF = *MI.getParent()->getParent();
1416 // All instructions input are two-addr instructions. Get the known operands.
1417 const MachineOperand &Dest = MI.getOperand(0);
1418 const MachineOperand &Src = MI.getOperand(1);
1419
1420 // Ideally, operations with undef should be folded before we get here, but we
1421 // can't guarantee it. Bail out because optimizing undefs is a waste of time.
1422 // Without this, we have to forward undef state to new register operands to
1423 // avoid machine verifier errors.
1424 if (Src.isUndef())
1425 return nullptr;
1426 if (MI.getNumOperands() > 2)
1427 if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
1428 return nullptr;
1429
1430 MachineInstr *NewMI = nullptr;
1431 Register SrcReg, SrcReg2;
1432 bool Is64Bit = Subtarget.is64Bit();
1433
1434 bool Is8BitOp = false;
1435 unsigned NumRegOperands = 2;
1436 unsigned MIOpc = MI.getOpcode();
1437 switch (MIOpc) {
1438 default:
1439 llvm_unreachable("Unreachable!");
1440 case X86::SHL64ri: {
1441 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
1442 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1443 if (!isTruncatedShiftCountForLEA(ShAmt))
1444 return nullptr;
1445
1446 // LEA can't handle RSP.
1447 if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass(
1448 Src.getReg(), &X86::GR64_NOSPRegClass))
1449 return nullptr;
1450
1451 NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
1452 .add(Dest)
1453 .addReg(0)
1454 .addImm(1LL << ShAmt)
1455 .add(Src)
1456 .addImm(0)
1457 .addReg(0);
1458 break;
1459 }
1460 case X86::SHL32ri: {
1461 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
1462 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1463 if (!isTruncatedShiftCountForLEA(ShAmt))
1464 return nullptr;
1465
1466 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1467
1468 // LEA can't handle ESP.
1469 bool isKill;
1470 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1471 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1472 ImplicitOp, LV, LIS))
1473 return nullptr;
1474
1475 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1476 .add(Dest)
1477 .addReg(0)
1478 .addImm(1LL << ShAmt)
1479 .addReg(SrcReg, getKillRegState(isKill))
1480 .addImm(0)
1481 .addReg(0);
1482 if (ImplicitOp.getReg() != 0)
1483 MIB.add(ImplicitOp);
1484 NewMI = MIB;
1485
1486 // Add kills if classifyLEAReg created a new register.
1487 if (LV && SrcReg != Src.getReg())
1488 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1489 break;
1490 }
1491 case X86::SHL8ri:
1492 Is8BitOp = true;
1493 [[fallthrough]];
1494 case X86::SHL16ri: {
1495 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
1496 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1497 if (!isTruncatedShiftCountForLEA(ShAmt))
1498 return nullptr;
1499 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1500 }
1501 case X86::INC64r:
1502 case X86::INC32r: {
1503 assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
1504 unsigned Opc = MIOpc == X86::INC64r
1505 ? X86::LEA64r
1506 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1507 bool isKill;
1508 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1509 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1510 ImplicitOp, LV, LIS))
1511 return nullptr;
1512
1513 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1514 .add(Dest)
1515 .addReg(SrcReg, getKillRegState(isKill));
1516 if (ImplicitOp.getReg() != 0)
1517 MIB.add(ImplicitOp);
1518
1519 NewMI = addOffset(MIB, 1);
1520
1521 // Add kills if classifyLEAReg created a new register.
1522 if (LV && SrcReg != Src.getReg())
1523 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1524 break;
1525 }
1526 case X86::DEC64r:
1527 case X86::DEC32r: {
1528 assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
1529 unsigned Opc = MIOpc == X86::DEC64r
1530 ? X86::LEA64r
1531 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1532
1533 bool isKill;
1534 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1535 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1536 ImplicitOp, LV, LIS))
1537 return nullptr;
1538
1539 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1540 .add(Dest)
1541 .addReg(SrcReg, getKillRegState(isKill));
1542 if (ImplicitOp.getReg() != 0)
1543 MIB.add(ImplicitOp);
1544
1545 NewMI = addOffset(MIB, -1);
1546
1547 // Add kills if classifyLEAReg created a new register.
1548 if (LV && SrcReg != Src.getReg())
1549 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1550 break;
1551 }
1552 case X86::DEC8r:
1553 case X86::INC8r:
1554 Is8BitOp = true;
1555 [[fallthrough]];
1556 case X86::DEC16r:
1557 case X86::INC16r:
1558 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1559 case X86::ADD64rr:
1560 case X86::ADD64rr_DB:
1561 case X86::ADD32rr:
1562 case X86::ADD32rr_DB: {
1563 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1564 unsigned Opc;
1565 if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
1566 Opc = X86::LEA64r;
1567 else
1568 Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1569
1570 const MachineOperand &Src2 = MI.getOperand(2);
1571 bool isKill2;
1572 MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
1573 if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/false, SrcReg2, isKill2,
1574 ImplicitOp2, LV, LIS))
1575 return nullptr;
1576
1577 bool isKill;
1578 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1579 if (Src.getReg() == Src2.getReg()) {
1580 // Don't call classify LEAReg a second time on the same register, in case
1581 // the first call inserted a COPY from Src2 and marked it as killed.
1582 isKill = isKill2;
1583 SrcReg = SrcReg2;
1584 } else {
1585 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1586 ImplicitOp, LV, LIS))
1587 return nullptr;
1588 }
1589
1590 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
1591 if (ImplicitOp.getReg() != 0)
1592 MIB.add(ImplicitOp);
1593 if (ImplicitOp2.getReg() != 0)
1594 MIB.add(ImplicitOp2);
1595
1596 NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
1597
1598 // Add kills if classifyLEAReg created a new register.
1599 if (LV) {
1600 if (SrcReg2 != Src2.getReg())
1601 LV->getVarInfo(SrcReg2).Kills.push_back(NewMI);
1602 if (SrcReg != SrcReg2 && SrcReg != Src.getReg())
1603 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1604 }
1605 NumRegOperands = 3;
1606 break;
1607 }
1608 case X86::ADD8rr:
1609 case X86::ADD8rr_DB:
1610 Is8BitOp = true;
1611 [[fallthrough]];
1612 case X86::ADD16rr:
1613 case X86::ADD16rr_DB:
1614 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1615 case X86::ADD64ri32:
1616 case X86::ADD64ri32_DB:
1617 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1618 NewMI = addOffset(
1619 BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
1620 MI.getOperand(2));
1621 break;
1622 case X86::ADD32ri:
1623 case X86::ADD32ri_DB: {
1624 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1625 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1626
1627 bool isKill;
1628 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1629 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1630 ImplicitOp, LV, LIS))
1631 return nullptr;
1632
1633 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1634 .add(Dest)
1635 .addReg(SrcReg, getKillRegState(isKill));
1636 if (ImplicitOp.getReg() != 0)
1637 MIB.add(ImplicitOp);
1638
1639 NewMI = addOffset(MIB, MI.getOperand(2));
1640
1641 // Add kills if classifyLEAReg created a new register.
1642 if (LV && SrcReg != Src.getReg())
1643 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1644 break;
1645 }
1646 case X86::ADD8ri:
1647 case X86::ADD8ri_DB:
1648 Is8BitOp = true;
1649 [[fallthrough]];
1650 case X86::ADD16ri:
1651 case X86::ADD16ri_DB:
1652 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1653 case X86::SUB8ri:
1654 case X86::SUB16ri:
1655 /// FIXME: Support these similar to ADD8ri/ADD16ri*.
1656 return nullptr;
1657 case X86::SUB32ri: {
1658 if (!MI.getOperand(2).isImm())
1659 return nullptr;
1660 int64_t Imm = MI.getOperand(2).getImm();
1661 if (!isInt<32>(-Imm))
1662 return nullptr;
1663
1664 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1665 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1666
1667 bool isKill;
1668 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1669 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1670 ImplicitOp, LV, LIS))
1671 return nullptr;
1672
1673 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1674 .add(Dest)
1675 .addReg(SrcReg, getKillRegState(isKill));
1676 if (ImplicitOp.getReg() != 0)
1677 MIB.add(ImplicitOp);
1678
1679 NewMI = addOffset(MIB, -Imm);
1680
1681 // Add kills if classifyLEAReg created a new register.
1682 if (LV && SrcReg != Src.getReg())
1683 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1684 break;
1685 }
1686
1687 case X86::SUB64ri32: {
1688 if (!MI.getOperand(2).isImm())
1689 return nullptr;
1690 int64_t Imm = MI.getOperand(2).getImm();
1691 if (!isInt<32>(-Imm))
1692 return nullptr;
1693
1694 assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
1695
1697 BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src);
1698 NewMI = addOffset(MIB, -Imm);
1699 break;
1700 }
1701
1702 case X86::VMOVDQU8Z128rmk:
1703 case X86::VMOVDQU8Z256rmk:
1704 case X86::VMOVDQU8Zrmk:
1705 case X86::VMOVDQU16Z128rmk:
1706 case X86::VMOVDQU16Z256rmk:
1707 case X86::VMOVDQU16Zrmk:
1708 case X86::VMOVDQU32Z128rmk:
1709 case X86::VMOVDQA32Z128rmk:
1710 case X86::VMOVDQU32Z256rmk:
1711 case X86::VMOVDQA32Z256rmk:
1712 case X86::VMOVDQU32Zrmk:
1713 case X86::VMOVDQA32Zrmk:
1714 case X86::VMOVDQU64Z128rmk:
1715 case X86::VMOVDQA64Z128rmk:
1716 case X86::VMOVDQU64Z256rmk:
1717 case X86::VMOVDQA64Z256rmk:
1718 case X86::VMOVDQU64Zrmk:
1719 case X86::VMOVDQA64Zrmk:
1720 case X86::VMOVUPDZ128rmk:
1721 case X86::VMOVAPDZ128rmk:
1722 case X86::VMOVUPDZ256rmk:
1723 case X86::VMOVAPDZ256rmk:
1724 case X86::VMOVUPDZrmk:
1725 case X86::VMOVAPDZrmk:
1726 case X86::VMOVUPSZ128rmk:
1727 case X86::VMOVAPSZ128rmk:
1728 case X86::VMOVUPSZ256rmk:
1729 case X86::VMOVAPSZ256rmk:
1730 case X86::VMOVUPSZrmk:
1731 case X86::VMOVAPSZrmk:
1732 case X86::VBROADCASTSDZ256rmk:
1733 case X86::VBROADCASTSDZrmk:
1734 case X86::VBROADCASTSSZ128rmk:
1735 case X86::VBROADCASTSSZ256rmk:
1736 case X86::VBROADCASTSSZrmk:
1737 case X86::VPBROADCASTDZ128rmk:
1738 case X86::VPBROADCASTDZ256rmk:
1739 case X86::VPBROADCASTDZrmk:
1740 case X86::VPBROADCASTQZ128rmk:
1741 case X86::VPBROADCASTQZ256rmk:
1742 case X86::VPBROADCASTQZrmk: {
1743 unsigned Opc;
1744 switch (MIOpc) {
1745 default:
1746 llvm_unreachable("Unreachable!");
1747 case X86::VMOVDQU8Z128rmk:
1748 Opc = X86::VPBLENDMBZ128rmk;
1749 break;
1750 case X86::VMOVDQU8Z256rmk:
1751 Opc = X86::VPBLENDMBZ256rmk;
1752 break;
1753 case X86::VMOVDQU8Zrmk:
1754 Opc = X86::VPBLENDMBZrmk;
1755 break;
1756 case X86::VMOVDQU16Z128rmk:
1757 Opc = X86::VPBLENDMWZ128rmk;
1758 break;
1759 case X86::VMOVDQU16Z256rmk:
1760 Opc = X86::VPBLENDMWZ256rmk;
1761 break;
1762 case X86::VMOVDQU16Zrmk:
1763 Opc = X86::VPBLENDMWZrmk;
1764 break;
1765 case X86::VMOVDQU32Z128rmk:
1766 Opc = X86::VPBLENDMDZ128rmk;
1767 break;
1768 case X86::VMOVDQU32Z256rmk:
1769 Opc = X86::VPBLENDMDZ256rmk;
1770 break;
1771 case X86::VMOVDQU32Zrmk:
1772 Opc = X86::VPBLENDMDZrmk;
1773 break;
1774 case X86::VMOVDQU64Z128rmk:
1775 Opc = X86::VPBLENDMQZ128rmk;
1776 break;
1777 case X86::VMOVDQU64Z256rmk:
1778 Opc = X86::VPBLENDMQZ256rmk;
1779 break;
1780 case X86::VMOVDQU64Zrmk:
1781 Opc = X86::VPBLENDMQZrmk;
1782 break;
1783 case X86::VMOVUPDZ128rmk:
1784 Opc = X86::VBLENDMPDZ128rmk;
1785 break;
1786 case X86::VMOVUPDZ256rmk:
1787 Opc = X86::VBLENDMPDZ256rmk;
1788 break;
1789 case X86::VMOVUPDZrmk:
1790 Opc = X86::VBLENDMPDZrmk;
1791 break;
1792 case X86::VMOVUPSZ128rmk:
1793 Opc = X86::VBLENDMPSZ128rmk;
1794 break;
1795 case X86::VMOVUPSZ256rmk:
1796 Opc = X86::VBLENDMPSZ256rmk;
1797 break;
1798 case X86::VMOVUPSZrmk:
1799 Opc = X86::VBLENDMPSZrmk;
1800 break;
1801 case X86::VMOVDQA32Z128rmk:
1802 Opc = X86::VPBLENDMDZ128rmk;
1803 break;
1804 case X86::VMOVDQA32Z256rmk:
1805 Opc = X86::VPBLENDMDZ256rmk;
1806 break;
1807 case X86::VMOVDQA32Zrmk:
1808 Opc = X86::VPBLENDMDZrmk;
1809 break;
1810 case X86::VMOVDQA64Z128rmk:
1811 Opc = X86::VPBLENDMQZ128rmk;
1812 break;
1813 case X86::VMOVDQA64Z256rmk:
1814 Opc = X86::VPBLENDMQZ256rmk;
1815 break;
1816 case X86::VMOVDQA64Zrmk:
1817 Opc = X86::VPBLENDMQZrmk;
1818 break;
1819 case X86::VMOVAPDZ128rmk:
1820 Opc = X86::VBLENDMPDZ128rmk;
1821 break;
1822 case X86::VMOVAPDZ256rmk:
1823 Opc = X86::VBLENDMPDZ256rmk;
1824 break;
1825 case X86::VMOVAPDZrmk:
1826 Opc = X86::VBLENDMPDZrmk;
1827 break;
1828 case X86::VMOVAPSZ128rmk:
1829 Opc = X86::VBLENDMPSZ128rmk;
1830 break;
1831 case X86::VMOVAPSZ256rmk:
1832 Opc = X86::VBLENDMPSZ256rmk;
1833 break;
1834 case X86::VMOVAPSZrmk:
1835 Opc = X86::VBLENDMPSZrmk;
1836 break;
1837 case X86::VBROADCASTSDZ256rmk:
1838 Opc = X86::VBLENDMPDZ256rmbk;
1839 break;
1840 case X86::VBROADCASTSDZrmk:
1841 Opc = X86::VBLENDMPDZrmbk;
1842 break;
1843 case X86::VBROADCASTSSZ128rmk:
1844 Opc = X86::VBLENDMPSZ128rmbk;
1845 break;
1846 case X86::VBROADCASTSSZ256rmk:
1847 Opc = X86::VBLENDMPSZ256rmbk;
1848 break;
1849 case X86::VBROADCASTSSZrmk:
1850 Opc = X86::VBLENDMPSZrmbk;
1851 break;
1852 case X86::VPBROADCASTDZ128rmk:
1853 Opc = X86::VPBLENDMDZ128rmbk;
1854 break;
1855 case X86::VPBROADCASTDZ256rmk:
1856 Opc = X86::VPBLENDMDZ256rmbk;
1857 break;
1858 case X86::VPBROADCASTDZrmk:
1859 Opc = X86::VPBLENDMDZrmbk;
1860 break;
1861 case X86::VPBROADCASTQZ128rmk:
1862 Opc = X86::VPBLENDMQZ128rmbk;
1863 break;
1864 case X86::VPBROADCASTQZ256rmk:
1865 Opc = X86::VPBLENDMQZ256rmbk;
1866 break;
1867 case X86::VPBROADCASTQZrmk:
1868 Opc = X86::VPBLENDMQZrmbk;
1869 break;
1870 }
1871
1872 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1873 .add(Dest)
1874 .add(MI.getOperand(2))
1875 .add(Src)
1876 .add(MI.getOperand(3))
1877 .add(MI.getOperand(4))
1878 .add(MI.getOperand(5))
1879 .add(MI.getOperand(6))
1880 .add(MI.getOperand(7));
1881 NumRegOperands = 4;
1882 break;
1883 }
1884
1885 case X86::VMOVDQU8Z128rrk:
1886 case X86::VMOVDQU8Z256rrk:
1887 case X86::VMOVDQU8Zrrk:
1888 case X86::VMOVDQU16Z128rrk:
1889 case X86::VMOVDQU16Z256rrk:
1890 case X86::VMOVDQU16Zrrk:
1891 case X86::VMOVDQU32Z128rrk:
1892 case X86::VMOVDQA32Z128rrk:
1893 case X86::VMOVDQU32Z256rrk:
1894 case X86::VMOVDQA32Z256rrk:
1895 case X86::VMOVDQU32Zrrk:
1896 case X86::VMOVDQA32Zrrk:
1897 case X86::VMOVDQU64Z128rrk:
1898 case X86::VMOVDQA64Z128rrk:
1899 case X86::VMOVDQU64Z256rrk:
1900 case X86::VMOVDQA64Z256rrk:
1901 case X86::VMOVDQU64Zrrk:
1902 case X86::VMOVDQA64Zrrk:
1903 case X86::VMOVUPDZ128rrk:
1904 case X86::VMOVAPDZ128rrk:
1905 case X86::VMOVUPDZ256rrk:
1906 case X86::VMOVAPDZ256rrk:
1907 case X86::VMOVUPDZrrk:
1908 case X86::VMOVAPDZrrk:
1909 case X86::VMOVUPSZ128rrk:
1910 case X86::VMOVAPSZ128rrk:
1911 case X86::VMOVUPSZ256rrk:
1912 case X86::VMOVAPSZ256rrk:
1913 case X86::VMOVUPSZrrk:
1914 case X86::VMOVAPSZrrk: {
1915 unsigned Opc;
1916 switch (MIOpc) {
1917 default:
1918 llvm_unreachable("Unreachable!");
1919 case X86::VMOVDQU8Z128rrk:
1920 Opc = X86::VPBLENDMBZ128rrk;
1921 break;
1922 case X86::VMOVDQU8Z256rrk:
1923 Opc = X86::VPBLENDMBZ256rrk;
1924 break;
1925 case X86::VMOVDQU8Zrrk:
1926 Opc = X86::VPBLENDMBZrrk;
1927 break;
1928 case X86::VMOVDQU16Z128rrk:
1929 Opc = X86::VPBLENDMWZ128rrk;
1930 break;
1931 case X86::VMOVDQU16Z256rrk:
1932 Opc = X86::VPBLENDMWZ256rrk;
1933 break;
1934 case X86::VMOVDQU16Zrrk:
1935 Opc = X86::VPBLENDMWZrrk;
1936 break;
1937 case X86::VMOVDQU32Z128rrk:
1938 Opc = X86::VPBLENDMDZ128rrk;
1939 break;
1940 case X86::VMOVDQU32Z256rrk:
1941 Opc = X86::VPBLENDMDZ256rrk;
1942 break;
1943 case X86::VMOVDQU32Zrrk:
1944 Opc = X86::VPBLENDMDZrrk;
1945 break;
1946 case X86::VMOVDQU64Z128rrk:
1947 Opc = X86::VPBLENDMQZ128rrk;
1948 break;
1949 case X86::VMOVDQU64Z256rrk:
1950 Opc = X86::VPBLENDMQZ256rrk;
1951 break;
1952 case X86::VMOVDQU64Zrrk:
1953 Opc = X86::VPBLENDMQZrrk;
1954 break;
1955 case X86::VMOVUPDZ128rrk:
1956 Opc = X86::VBLENDMPDZ128rrk;
1957 break;
1958 case X86::VMOVUPDZ256rrk:
1959 Opc = X86::VBLENDMPDZ256rrk;
1960 break;
1961 case X86::VMOVUPDZrrk:
1962 Opc = X86::VBLENDMPDZrrk;
1963 break;
1964 case X86::VMOVUPSZ128rrk:
1965 Opc = X86::VBLENDMPSZ128rrk;
1966 break;
1967 case X86::VMOVUPSZ256rrk:
1968 Opc = X86::VBLENDMPSZ256rrk;
1969 break;
1970 case X86::VMOVUPSZrrk:
1971 Opc = X86::VBLENDMPSZrrk;
1972 break;
1973 case X86::VMOVDQA32Z128rrk:
1974 Opc = X86::VPBLENDMDZ128rrk;
1975 break;
1976 case X86::VMOVDQA32Z256rrk:
1977 Opc = X86::VPBLENDMDZ256rrk;
1978 break;
1979 case X86::VMOVDQA32Zrrk:
1980 Opc = X86::VPBLENDMDZrrk;
1981 break;
1982 case X86::VMOVDQA64Z128rrk:
1983 Opc = X86::VPBLENDMQZ128rrk;
1984 break;
1985 case X86::VMOVDQA64Z256rrk:
1986 Opc = X86::VPBLENDMQZ256rrk;
1987 break;
1988 case X86::VMOVDQA64Zrrk:
1989 Opc = X86::VPBLENDMQZrrk;
1990 break;
1991 case X86::VMOVAPDZ128rrk:
1992 Opc = X86::VBLENDMPDZ128rrk;
1993 break;
1994 case X86::VMOVAPDZ256rrk:
1995 Opc = X86::VBLENDMPDZ256rrk;
1996 break;
1997 case X86::VMOVAPDZrrk:
1998 Opc = X86::VBLENDMPDZrrk;
1999 break;
2000 case X86::VMOVAPSZ128rrk:
2001 Opc = X86::VBLENDMPSZ128rrk;
2002 break;
2003 case X86::VMOVAPSZ256rrk:
2004 Opc = X86::VBLENDMPSZ256rrk;
2005 break;
2006 case X86::VMOVAPSZrrk:
2007 Opc = X86::VBLENDMPSZrrk;
2008 break;
2009 }
2010
2011 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
2012 .add(Dest)
2013 .add(MI.getOperand(2))
2014 .add(Src)
2015 .add(MI.getOperand(3));
2016 NumRegOperands = 4;
2017 break;
2018 }
2019 }
2020
2021 if (!NewMI)
2022 return nullptr;
2023
2024 if (LV) { // Update live variables
2025 for (unsigned I = 0; I < NumRegOperands; ++I) {
2026 MachineOperand &Op = MI.getOperand(I);
2027 if (Op.isReg() && (Op.isDead() || Op.isKill()))
2028 LV->replaceKillInstruction(Op.getReg(), MI, *NewMI);
2029 }
2030 }
2031
2032 MachineBasicBlock &MBB = *MI.getParent();
2033 MBB.insert(MI.getIterator(), NewMI); // Insert the new inst
2034
2035 if (LIS) {
2036 LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
2037 if (SrcReg)
2038 LIS->getInterval(SrcReg);
2039 if (SrcReg2)
2040 LIS->getInterval(SrcReg2);
2041 }
2042
2043 return NewMI;
2044}
2045
2046/// This determines which of three possible cases of a three source commute
2047/// the source indexes correspond to taking into account any mask operands.
2048/// All prevents commuting a passthru operand. Returns -1 if the commute isn't
2049/// possible.
2050/// Case 0 - Possible to commute the first and second operands.
2051/// Case 1 - Possible to commute the first and third operands.
2052/// Case 2 - Possible to commute the second and third operands.
2053static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
2054 unsigned SrcOpIdx2) {
2055 // Put the lowest index to SrcOpIdx1 to simplify the checks below.
2056 if (SrcOpIdx1 > SrcOpIdx2)
2057 std::swap(SrcOpIdx1, SrcOpIdx2);
2058
2059 unsigned Op1 = 1, Op2 = 2, Op3 = 3;
2060 if (X86II::isKMasked(TSFlags)) {
2061 Op2++;
2062 Op3++;
2063 }
2064
2065 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
2066 return 0;
2067 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
2068 return 1;
2069 if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
2070 return 2;
2071 llvm_unreachable("Unknown three src commute case.");
2072}
2073
2075 const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
2076 const X86InstrFMA3Group &FMA3Group) const {
2077
2078 unsigned Opc = MI.getOpcode();
2079
2080 // TODO: Commuting the 1st operand of FMA*_Int requires some additional
2081 // analysis. The commute optimization is legal only if all users of FMA*_Int
2082 // use only the lowest element of the FMA*_Int instruction. Such analysis are
2083 // not implemented yet. So, just return 0 in that case.
2084 // When such analysis are available this place will be the right place for
2085 // calling it.
2086 assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) &&
2087 "Intrinsic instructions can't commute operand 1");
2088
2089 // Determine which case this commute is or if it can't be done.
2090 unsigned Case =
2091 getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
2092 assert(Case < 3 && "Unexpected case number!");
2093
2094 // Define the FMA forms mapping array that helps to map input FMA form
2095 // to output FMA form to preserve the operation semantics after
2096 // commuting the operands.
2097 const unsigned Form132Index = 0;
2098 const unsigned Form213Index = 1;
2099 const unsigned Form231Index = 2;
2100 static const unsigned FormMapping[][3] = {
2101 // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
2102 // FMA132 A, C, b; ==> FMA231 C, A, b;
2103 // FMA213 B, A, c; ==> FMA213 A, B, c;
2104 // FMA231 C, A, b; ==> FMA132 A, C, b;
2105 {Form231Index, Form213Index, Form132Index},
2106 // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
2107 // FMA132 A, c, B; ==> FMA132 B, c, A;
2108 // FMA213 B, a, C; ==> FMA231 C, a, B;
2109 // FMA231 C, a, B; ==> FMA213 B, a, C;
2110 {Form132Index, Form231Index, Form213Index},
2111 // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
2112 // FMA132 a, C, B; ==> FMA213 a, B, C;
2113 // FMA213 b, A, C; ==> FMA132 b, C, A;
2114 // FMA231 c, A, B; ==> FMA231 c, B, A;
2115 {Form213Index, Form132Index, Form231Index}};
2116
2117 unsigned FMAForms[3];
2118 FMAForms[0] = FMA3Group.get132Opcode();
2119 FMAForms[1] = FMA3Group.get213Opcode();
2120 FMAForms[2] = FMA3Group.get231Opcode();
2121
2122 // Everything is ready, just adjust the FMA opcode and return it.
2123 for (unsigned FormIndex = 0; FormIndex < 3; FormIndex++)
2124 if (Opc == FMAForms[FormIndex])
2125 return FMAForms[FormMapping[Case][FormIndex]];
2126
2127 llvm_unreachable("Illegal FMA3 format");
2128}
2129
2130static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
2131 unsigned SrcOpIdx2) {
2132 // Determine which case this commute is or if it can't be done.
2133 unsigned Case =
2134 getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
2135 assert(Case < 3 && "Unexpected case value!");
2136
2137 // For each case we need to swap two pairs of bits in the final immediate.
2138 static const uint8_t SwapMasks[3][4] = {
2139 {0x04, 0x10, 0x08, 0x20}, // Swap bits 2/4 and 3/5.
2140 {0x02, 0x10, 0x08, 0x40}, // Swap bits 1/4 and 3/6.
2141 {0x02, 0x04, 0x20, 0x40}, // Swap bits 1/2 and 5/6.
2142 };
2143
2144 uint8_t Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
2145 // Clear out the bits we are swapping.
2146 uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
2147 SwapMasks[Case][2] | SwapMasks[Case][3]);
2148 // If the immediate had a bit of the pair set, then set the opposite bit.
2149 if (Imm & SwapMasks[Case][0])
2150 NewImm |= SwapMasks[Case][1];
2151 if (Imm & SwapMasks[Case][1])
2152 NewImm |= SwapMasks[Case][0];
2153 if (Imm & SwapMasks[Case][2])
2154 NewImm |= SwapMasks[Case][3];
2155 if (Imm & SwapMasks[Case][3])
2156 NewImm |= SwapMasks[Case][2];
2157 MI.getOperand(MI.getNumOperands() - 1).setImm(NewImm);
2158}
2159
2160// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be
2161// commuted.
2162static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
2163#define VPERM_CASES(Suffix) \
2164 case X86::VPERMI2##Suffix##Z128rr: \
2165 case X86::VPERMT2##Suffix##Z128rr: \
2166 case X86::VPERMI2##Suffix##Z256rr: \
2167 case X86::VPERMT2##Suffix##Z256rr: \
2168 case X86::VPERMI2##Suffix##Zrr: \
2169 case X86::VPERMT2##Suffix##Zrr: \
2170 case X86::VPERMI2##Suffix##Z128rm: \
2171 case X86::VPERMT2##Suffix##Z128rm: \
2172 case X86::VPERMI2##Suffix##Z256rm: \
2173 case X86::VPERMT2##Suffix##Z256rm: \
2174 case X86::VPERMI2##Suffix##Zrm: \
2175 case X86::VPERMT2##Suffix##Zrm: \
2176 case X86::VPERMI2##Suffix##Z128rrkz: \
2177 case X86::VPERMT2##Suffix##Z128rrkz: \
2178 case X86::VPERMI2##Suffix##Z256rrkz: \
2179 case X86::VPERMT2##Suffix##Z256rrkz: \
2180 case X86::VPERMI2##Suffix##Zrrkz: \
2181 case X86::VPERMT2##Suffix##Zrrkz: \
2182 case X86::VPERMI2##Suffix##Z128rmkz: \
2183 case X86::VPERMT2##Suffix##Z128rmkz: \
2184 case X86::VPERMI2##Suffix##Z256rmkz: \
2185 case X86::VPERMT2##Suffix##Z256rmkz: \
2186 case X86::VPERMI2##Suffix##Zrmkz: \
2187 case X86::VPERMT2##Suffix##Zrmkz:
2188
2189#define VPERM_CASES_BROADCAST(Suffix) \
2190 VPERM_CASES(Suffix) \
2191 case X86::VPERMI2##Suffix##Z128rmb: \
2192 case X86::VPERMT2##Suffix##Z128rmb: \
2193 case X86::VPERMI2##Suffix##Z256rmb: \
2194 case X86::VPERMT2##Suffix##Z256rmb: \
2195 case X86::VPERMI2##Suffix##Zrmb: \
2196 case X86::VPERMT2##Suffix##Zrmb: \
2197 case X86::VPERMI2##Suffix##Z128rmbkz: \
2198 case X86::VPERMT2##Suffix##Z128rmbkz: \
2199 case X86::VPERMI2##Suffix##Z256rmbkz: \
2200 case X86::VPERMT2##Suffix##Z256rmbkz: \
2201 case X86::VPERMI2##Suffix##Zrmbkz: \
2202 case X86::VPERMT2##Suffix##Zrmbkz:
2203
2204 switch (Opcode) {
2205 default:
2206 return false;
2207 VPERM_CASES(B)
2212 VPERM_CASES(W)
2213 return true;
2214 }
2215#undef VPERM_CASES_BROADCAST
2216#undef VPERM_CASES
2217}
2218
2219// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
2220// from the I opcode to the T opcode and vice versa.
2221static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
2222#define VPERM_CASES(Orig, New) \
2223 case X86::Orig##Z128rr: \
2224 return X86::New##Z128rr; \
2225 case X86::Orig##Z128rrkz: \
2226 return X86::New##Z128rrkz; \
2227 case X86::Orig##Z128rm: \
2228 return X86::New##Z128rm; \
2229 case X86::Orig##Z128rmkz: \
2230 return X86::New##Z128rmkz; \
2231 case X86::Orig##Z256rr: \
2232 return X86::New##Z256rr; \
2233 case X86::Orig##Z256rrkz: \
2234 return X86::New##Z256rrkz; \
2235 case X86::Orig##Z256rm: \
2236 return X86::New##Z256rm; \
2237 case X86::Orig##Z256rmkz: \
2238 return X86::New##Z256rmkz; \
2239 case X86::Orig##Zrr: \
2240 return X86::New##Zrr; \
2241 case X86::Orig##Zrrkz: \
2242 return X86::New##Zrrkz; \
2243 case X86::Orig##Zrm: \
2244 return X86::New##Zrm; \
2245 case X86::Orig##Zrmkz: \
2246 return X86::New##Zrmkz;
2247
2248#define VPERM_CASES_BROADCAST(Orig, New) \
2249 VPERM_CASES(Orig, New) \
2250 case X86::Orig##Z128rmb: \
2251 return X86::New##Z128rmb; \
2252 case X86::Orig##Z128rmbkz: \
2253 return X86::New##Z128rmbkz; \
2254 case X86::Orig##Z256rmb: \
2255 return X86::New##Z256rmb; \
2256 case X86::Orig##Z256rmbkz: \
2257 return X86::New##Z256rmbkz; \
2258 case X86::Orig##Zrmb: \
2259 return X86::New##Zrmb; \
2260 case X86::Orig##Zrmbkz: \
2261 return X86::New##Zrmbkz;
2262
2263 switch (Opcode) {
2264 VPERM_CASES(VPERMI2B, VPERMT2B)
2265 VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
2266 VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
2267 VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
2268 VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
2269 VPERM_CASES(VPERMI2W, VPERMT2W)
2270 VPERM_CASES(VPERMT2B, VPERMI2B)
2271 VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
2272 VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
2273 VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
2274 VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
2275 VPERM_CASES(VPERMT2W, VPERMI2W)
2276 }
2277
2278 llvm_unreachable("Unreachable!");
2279#undef VPERM_CASES_BROADCAST
2280#undef VPERM_CASES
2281}
2282
2284 unsigned OpIdx1,
2285 unsigned OpIdx2) const {
2286 auto CloneIfNew = [&](MachineInstr &MI) {
2287 return std::exchange(NewMI, false)
2288 ? MI.getParent()->getParent()->CloneMachineInstr(&MI)
2289 : &MI;
2290 };
2291 MachineInstr *WorkingMI = nullptr;
2292 unsigned Opc = MI.getOpcode();
2293
2294#define CASE_ND(OP) \
2295 case X86::OP: \
2296 case X86::OP##_ND:
2297
2298 switch (Opc) {
2299 // SHLD B, C, I <-> SHRD C, B, (BitWidth - I)
2300 CASE_ND(SHRD16rri8)
2301 CASE_ND(SHLD16rri8)
2302 CASE_ND(SHRD32rri8)
2303 CASE_ND(SHLD32rri8)
2304 CASE_ND(SHRD64rri8)
2305 CASE_ND(SHLD64rri8) {
2306 unsigned Size;
2307 switch (Opc) {
2308 default:
2309 llvm_unreachable("Unreachable!");
2310#define FROM_TO_SIZE(A, B, S) \
2311 case X86::A: \
2312 Opc = X86::B; \
2313 Size = S; \
2314 break; \
2315 case X86::A##_ND: \
2316 Opc = X86::B##_ND; \
2317 Size = S; \
2318 break; \
2319 case X86::B: \
2320 Opc = X86::A; \
2321 Size = S; \
2322 break; \
2323 case X86::B##_ND: \
2324 Opc = X86::A##_ND; \
2325 Size = S; \
2326 break;
2327
2328 FROM_TO_SIZE(SHRD16rri8, SHLD16rri8, 16)
2329 FROM_TO_SIZE(SHRD32rri8, SHLD32rri8, 32)
2330 FROM_TO_SIZE(SHRD64rri8, SHLD64rri8, 64)
2331#undef FROM_TO_SIZE
2332 }
2333 WorkingMI = CloneIfNew(MI);
2334 WorkingMI->setDesc(get(Opc));
2335 WorkingMI->getOperand(3).setImm(Size - MI.getOperand(3).getImm());
2336 break;
2337 }
2338 case X86::PFSUBrr:
2339 case X86::PFSUBRrr:
2340 // PFSUB x, y: x = x - y
2341 // PFSUBR x, y: x = y - x
2342 WorkingMI = CloneIfNew(MI);
2343 WorkingMI->setDesc(
2344 get(X86::PFSUBRrr == Opc ? X86::PFSUBrr : X86::PFSUBRrr));
2345 break;
2346 case X86::BLENDPDrri:
2347 case X86::BLENDPSrri:
2348 case X86::VBLENDPDrri:
2349 case X86::VBLENDPSrri:
2350 // If we're optimizing for size, try to use MOVSD/MOVSS.
2351 if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
2352 unsigned Mask = (Opc == X86::BLENDPDrri || Opc == X86::VBLENDPDrri) ? 0x03: 0x0F;
2353 if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
2354#define FROM_TO(FROM, TO) \
2355 case X86::FROM: \
2356 Opc = X86::TO; \
2357 break;
2358 switch (Opc) {
2359 default:
2360 llvm_unreachable("Unreachable!");
2361 FROM_TO(BLENDPDrri, MOVSDrr)
2362 FROM_TO(BLENDPSrri, MOVSSrr)
2363 FROM_TO(VBLENDPDrri, VMOVSDrr)
2364 FROM_TO(VBLENDPSrri, VMOVSSrr)
2365 }
2366 WorkingMI = CloneIfNew(MI);
2367 WorkingMI->setDesc(get(Opc));
2368 WorkingMI->removeOperand(3);
2369 break;
2370 }
2371#undef FROM_TO
2372 }
2373 [[fallthrough]];
2374 case X86::PBLENDWrri:
2375 case X86::VBLENDPDYrri:
2376 case X86::VBLENDPSYrri:
2377 case X86::VPBLENDDrri:
2378 case X86::VPBLENDWrri:
2379 case X86::VPBLENDDYrri:
2380 case X86::VPBLENDWYrri: {
2381 int8_t Mask;
2382 switch (Opc) {
2383 default:
2384 llvm_unreachable("Unreachable!");
2385 case X86::BLENDPDrri:
2386 Mask = (int8_t)0x03;
2387 break;
2388 case X86::BLENDPSrri:
2389 Mask = (int8_t)0x0F;
2390 break;
2391 case X86::PBLENDWrri:
2392 Mask = (int8_t)0xFF;
2393 break;
2394 case X86::VBLENDPDrri:
2395 Mask = (int8_t)0x03;
2396 break;
2397 case X86::VBLENDPSrri:
2398 Mask = (int8_t)0x0F;
2399 break;
2400 case X86::VBLENDPDYrri:
2401 Mask = (int8_t)0x0F;
2402 break;
2403 case X86::VBLENDPSYrri:
2404 Mask = (int8_t)0xFF;
2405 break;
2406 case X86::VPBLENDDrri:
2407 Mask = (int8_t)0x0F;
2408 break;
2409 case X86::VPBLENDWrri:
2410 Mask = (int8_t)0xFF;
2411 break;
2412 case X86::VPBLENDDYrri:
2413 Mask = (int8_t)0xFF;
2414 break;
2415 case X86::VPBLENDWYrri:
2416 Mask = (int8_t)0xFF;
2417 break;
2418 }
2419 // Only the least significant bits of Imm are used.
2420 // Using int8_t to ensure it will be sign extended to the int64_t that
2421 // setImm takes in order to match isel behavior.
2422 int8_t Imm = MI.getOperand(3).getImm() & Mask;
2423 WorkingMI = CloneIfNew(MI);
2424 WorkingMI->getOperand(3).setImm(Mask ^ Imm);
2425 break;
2426 }
2427 case X86::INSERTPSrr:
2428 case X86::VINSERTPSrr:
2429 case X86::VINSERTPSZrr: {
2430 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
2431 unsigned ZMask = Imm & 15;
2432 unsigned DstIdx = (Imm >> 4) & 3;
2433 unsigned SrcIdx = (Imm >> 6) & 3;
2434
2435 // We can commute insertps if we zero 2 of the elements, the insertion is
2436 // "inline" and we don't override the insertion with a zero.
2437 if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
2438 llvm::popcount(ZMask) == 2) {
2439 unsigned AltIdx = llvm::countr_zero((ZMask | (1 << DstIdx)) ^ 15);
2440 assert(AltIdx < 4 && "Illegal insertion index");
2441 unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
2442 WorkingMI = CloneIfNew(MI);
2443 WorkingMI->getOperand(MI.getNumOperands() - 1).setImm(AltImm);
2444 break;
2445 }
2446 return nullptr;
2447 }
2448 case X86::MOVSDrr:
2449 case X86::MOVSSrr:
2450 case X86::VMOVSDrr:
2451 case X86::VMOVSSrr: {
2452 // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
2453 if (Subtarget.hasSSE41()) {
2454 unsigned Mask;
2455 switch (Opc) {
2456 default:
2457 llvm_unreachable("Unreachable!");
2458 case X86::MOVSDrr:
2459 Opc = X86::BLENDPDrri;
2460 Mask = 0x02;
2461 break;
2462 case X86::MOVSSrr:
2463 Opc = X86::BLENDPSrri;
2464 Mask = 0x0E;
2465 break;
2466 case X86::VMOVSDrr:
2467 Opc = X86::VBLENDPDrri;
2468 Mask = 0x02;
2469 break;
2470 case X86::VMOVSSrr:
2471 Opc = X86::VBLENDPSrri;
2472 Mask = 0x0E;
2473 break;
2474 }
2475
2476 WorkingMI = CloneIfNew(MI);
2477 WorkingMI->setDesc(get(Opc));
2478 WorkingMI->addOperand(MachineOperand::CreateImm(Mask));
2479 break;
2480 }
2481
2482 WorkingMI = CloneIfNew(MI);
2483 WorkingMI->setDesc(get(X86::SHUFPDrri));
2484 WorkingMI->addOperand(MachineOperand::CreateImm(0x02));
2485 break;
2486 }
2487 case X86::SHUFPDrri: {
2488 // Commute to MOVSD.
2489 assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
2490 WorkingMI = CloneIfNew(MI);
2491 WorkingMI->setDesc(get(X86::MOVSDrr));
2492 WorkingMI->removeOperand(3);
2493 break;
2494 }
2495 case X86::PCLMULQDQrri:
2496 case X86::VPCLMULQDQrri:
2497 case X86::VPCLMULQDQYrri:
2498 case X86::VPCLMULQDQZrri:
2499 case X86::VPCLMULQDQZ128rri:
2500 case X86::VPCLMULQDQZ256rri: {
2501 // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
2502 // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
2503 unsigned Imm = MI.getOperand(3).getImm();
2504 unsigned Src1Hi = Imm & 0x01;
2505 unsigned Src2Hi = Imm & 0x10;
2506 WorkingMI = CloneIfNew(MI);
2507 WorkingMI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
2508 break;
2509 }
2510 case X86::VPCMPBZ128rri:
2511 case X86::VPCMPUBZ128rri:
2512 case X86::VPCMPBZ256rri:
2513 case X86::VPCMPUBZ256rri:
2514 case X86::VPCMPBZrri:
2515 case X86::VPCMPUBZrri:
2516 case X86::VPCMPDZ128rri:
2517 case X86::VPCMPUDZ128rri:
2518 case X86::VPCMPDZ256rri:
2519 case X86::VPCMPUDZ256rri:
2520 case X86::VPCMPDZrri:
2521 case X86::VPCMPUDZrri:
2522 case X86::VPCMPQZ128rri:
2523 case X86::VPCMPUQZ128rri:
2524 case X86::VPCMPQZ256rri:
2525 case X86::VPCMPUQZ256rri:
2526 case X86::VPCMPQZrri:
2527 case X86::VPCMPUQZrri:
2528 case X86::VPCMPWZ128rri:
2529 case X86::VPCMPUWZ128rri:
2530 case X86::VPCMPWZ256rri:
2531 case X86::VPCMPUWZ256rri:
2532 case X86::VPCMPWZrri:
2533 case X86::VPCMPUWZrri:
2534 case X86::VPCMPBZ128rrik:
2535 case X86::VPCMPUBZ128rrik:
2536 case X86::VPCMPBZ256rrik:
2537 case X86::VPCMPUBZ256rrik:
2538 case X86::VPCMPBZrrik:
2539 case X86::VPCMPUBZrrik:
2540 case X86::VPCMPDZ128rrik:
2541 case X86::VPCMPUDZ128rrik:
2542 case X86::VPCMPDZ256rrik:
2543 case X86::VPCMPUDZ256rrik:
2544 case X86::VPCMPDZrrik:
2545 case X86::VPCMPUDZrrik:
2546 case X86::VPCMPQZ128rrik:
2547 case X86::VPCMPUQZ128rrik:
2548 case X86::VPCMPQZ256rrik:
2549 case X86::VPCMPUQZ256rrik:
2550 case X86::VPCMPQZrrik:
2551 case X86::VPCMPUQZrrik:
2552 case X86::VPCMPWZ128rrik:
2553 case X86::VPCMPUWZ128rrik:
2554 case X86::VPCMPWZ256rrik:
2555 case X86::VPCMPUWZ256rrik:
2556 case X86::VPCMPWZrrik:
2557 case X86::VPCMPUWZrrik:
2558 WorkingMI = CloneIfNew(MI);
2559 // Flip comparison mode immediate (if necessary).
2560 WorkingMI->getOperand(MI.getNumOperands() - 1)
2562 MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7));
2563 break;
2564 case X86::VPCOMBri:
2565 case X86::VPCOMUBri:
2566 case X86::VPCOMDri:
2567 case X86::VPCOMUDri:
2568 case X86::VPCOMQri:
2569 case X86::VPCOMUQri:
2570 case X86::VPCOMWri:
2571 case X86::VPCOMUWri:
2572 WorkingMI = CloneIfNew(MI);
2573 // Flip comparison mode immediate (if necessary).
2574 WorkingMI->getOperand(3).setImm(
2575 X86::getSwappedVPCOMImm(MI.getOperand(3).getImm() & 0x7));
2576 break;
2577 case X86::VCMPSDZrri:
2578 case X86::VCMPSSZrri:
2579 case X86::VCMPPDZrri:
2580 case X86::VCMPPSZrri:
2581 case X86::VCMPSHZrri:
2582 case X86::VCMPPHZrri:
2583 case X86::VCMPPHZ128rri:
2584 case X86::VCMPPHZ256rri:
2585 case X86::VCMPPDZ128rri:
2586 case X86::VCMPPSZ128rri:
2587 case X86::VCMPPDZ256rri:
2588 case X86::VCMPPSZ256rri:
2589 case X86::VCMPPDZrrik:
2590 case X86::VCMPPSZrrik:
2591 case X86::VCMPPDZ128rrik:
2592 case X86::VCMPPSZ128rrik:
2593 case X86::VCMPPDZ256rrik:
2594 case X86::VCMPPSZ256rrik:
2595 WorkingMI = CloneIfNew(MI);
2596 WorkingMI->getOperand(MI.getNumExplicitOperands() - 1)
2598 MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f));
2599 break;
2600 case X86::VPERM2F128rr:
2601 case X86::VPERM2I128rr:
2602 // Flip permute source immediate.
2603 // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
2604 // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
2605 WorkingMI = CloneIfNew(MI);
2606 WorkingMI->getOperand(3).setImm((MI.getOperand(3).getImm() & 0xFF) ^ 0x22);
2607 break;
2608 case X86::MOVHLPSrr:
2609 case X86::UNPCKHPDrr:
2610 case X86::VMOVHLPSrr:
2611 case X86::VUNPCKHPDrr:
2612 case X86::VMOVHLPSZrr:
2613 case X86::VUNPCKHPDZ128rr:
2614 assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!");
2615
2616 switch (Opc) {
2617 default:
2618 llvm_unreachable("Unreachable!");
2619 case X86::MOVHLPSrr:
2620 Opc = X86::UNPCKHPDrr;
2621 break;
2622 case X86::UNPCKHPDrr:
2623 Opc = X86::MOVHLPSrr;
2624 break;
2625 case X86::VMOVHLPSrr:
2626 Opc = X86::VUNPCKHPDrr;
2627 break;
2628 case X86::VUNPCKHPDrr:
2629 Opc = X86::VMOVHLPSrr;
2630 break;
2631 case X86::VMOVHLPSZrr:
2632 Opc = X86::VUNPCKHPDZ128rr;
2633 break;
2634 case X86::VUNPCKHPDZ128rr:
2635 Opc = X86::VMOVHLPSZrr;
2636 break;
2637 }
2638 WorkingMI = CloneIfNew(MI);
2639 WorkingMI->setDesc(get(Opc));
2640 break;
2641 CASE_ND(CMOV16rr)
2642 CASE_ND(CMOV32rr)
2643 CASE_ND(CMOV64rr) {
2644 WorkingMI = CloneIfNew(MI);
2645 unsigned OpNo = MI.getDesc().getNumOperands() - 1;
2646 X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
2648 break;
2649 }
2650 case X86::VPTERNLOGDZrri:
2651 case X86::VPTERNLOGDZrmi:
2652 case X86::VPTERNLOGDZ128rri:
2653 case X86::VPTERNLOGDZ128rmi:
2654 case X86::VPTERNLOGDZ256rri:
2655 case X86::VPTERNLOGDZ256rmi:
2656 case X86::VPTERNLOGQZrri:
2657 case X86::VPTERNLOGQZrmi:
2658 case X86::VPTERNLOGQZ128rri:
2659 case X86::VPTERNLOGQZ128rmi:
2660 case X86::VPTERNLOGQZ256rri:
2661 case X86::VPTERNLOGQZ256rmi:
2662 case X86::VPTERNLOGDZrrik:
2663 case X86::VPTERNLOGDZ128rrik:
2664 case X86::VPTERNLOGDZ256rrik:
2665 case X86::VPTERNLOGQZrrik:
2666 case X86::VPTERNLOGQZ128rrik:
2667 case X86::VPTERNLOGQZ256rrik:
2668 case X86::VPTERNLOGDZrrikz:
2669 case X86::VPTERNLOGDZrmikz:
2670 case X86::VPTERNLOGDZ128rrikz:
2671 case X86::VPTERNLOGDZ128rmikz:
2672 case X86::VPTERNLOGDZ256rrikz:
2673 case X86::VPTERNLOGDZ256rmikz:
2674 case X86::VPTERNLOGQZrrikz:
2675 case X86::VPTERNLOGQZrmikz:
2676 case X86::VPTERNLOGQZ128rrikz:
2677 case X86::VPTERNLOGQZ128rmikz:
2678 case X86::VPTERNLOGQZ256rrikz:
2679 case X86::VPTERNLOGQZ256rmikz:
2680 case X86::VPTERNLOGDZ128rmbi:
2681 case X86::VPTERNLOGDZ256rmbi:
2682 case X86::VPTERNLOGDZrmbi:
2683 case X86::VPTERNLOGQZ128rmbi:
2684 case X86::VPTERNLOGQZ256rmbi:
2685 case X86::VPTERNLOGQZrmbi:
2686 case X86::VPTERNLOGDZ128rmbikz:
2687 case X86::VPTERNLOGDZ256rmbikz:
2688 case X86::VPTERNLOGDZrmbikz:
2689 case X86::VPTERNLOGQZ128rmbikz:
2690 case X86::VPTERNLOGQZ256rmbikz:
2691 case X86::VPTERNLOGQZrmbikz: {
2692 WorkingMI = CloneIfNew(MI);
2693 commuteVPTERNLOG(*WorkingMI, OpIdx1, OpIdx2);
2694 break;
2695 }
2696 default:
2698 WorkingMI = CloneIfNew(MI);
2699 WorkingMI->setDesc(get(getCommutedVPERMV3Opcode(Opc)));
2700 break;
2701 }
2702
2703 if (auto *FMA3Group = getFMA3Group(Opc, MI.getDesc().TSFlags)) {
2704 WorkingMI = CloneIfNew(MI);
2705 WorkingMI->setDesc(
2706 get(getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group)));
2707 break;
2708 }
2709 }
2710 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2711}
2712
2713bool X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
2714 unsigned &SrcOpIdx1,
2715 unsigned &SrcOpIdx2,
2716 bool IsIntrinsic) const {
2717 uint64_t TSFlags = MI.getDesc().TSFlags;
2718
2719 unsigned FirstCommutableVecOp = 1;
2720 unsigned LastCommutableVecOp = 3;
2721 unsigned KMaskOp = -1U;
2722 if (X86II::isKMasked(TSFlags)) {
2723 // For k-zero-masked operations it is Ok to commute the first vector
2724 // operand. Unless this is an intrinsic instruction.
2725 // For regular k-masked operations a conservative choice is done as the
2726 // elements of the first vector operand, for which the corresponding bit
2727 // in the k-mask operand is set to 0, are copied to the result of the
2728 // instruction.
2729 // TODO/FIXME: The commute still may be legal if it is known that the
2730 // k-mask operand is set to either all ones or all zeroes.
2731 // It is also Ok to commute the 1st operand if all users of MI use only
2732 // the elements enabled by the k-mask operand. For example,
2733 // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
2734 // : v1[i];
2735 // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
2736 // // Ok, to commute v1 in FMADD213PSZrk.
2737
2738 // The k-mask operand has index = 2 for masked and zero-masked operations.
2739 KMaskOp = 2;
2740
2741 // The operand with index = 1 is used as a source for those elements for
2742 // which the corresponding bit in the k-mask is set to 0.
2743 if (X86II::isKMergeMasked(TSFlags) || IsIntrinsic)
2744 FirstCommutableVecOp = 3;
2745
2746 LastCommutableVecOp++;
2747 } else if (IsIntrinsic) {
2748 // Commuting the first operand of an intrinsic instruction isn't possible
2749 // unless we can prove that only the lowest element of the result is used.
2750 FirstCommutableVecOp = 2;
2751 }
2752
2753 if (isMem(MI, LastCommutableVecOp))
2754 LastCommutableVecOp--;
2755
2756 // Only the first RegOpsNum operands are commutable.
2757 // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
2758 // that the operand is not specified/fixed.
2759 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2760 (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
2761 SrcOpIdx1 == KMaskOp))
2762 return false;
2763 if (SrcOpIdx2 != CommuteAnyOperandIndex &&
2764 (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
2765 SrcOpIdx2 == KMaskOp))
2766 return false;
2767
2768 // Look for two different register operands assumed to be commutable
2769 // regardless of the FMA opcode. The FMA opcode is adjusted later.
2770 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2771 SrcOpIdx2 == CommuteAnyOperandIndex) {
2772 unsigned CommutableOpIdx2 = SrcOpIdx2;
2773
2774 // At least one of operands to be commuted is not specified and
2775 // this method is free to choose appropriate commutable operands.
2776 if (SrcOpIdx1 == SrcOpIdx2)
2777 // Both of operands are not fixed. By default set one of commutable
2778 // operands to the last register operand of the instruction.
2779 CommutableOpIdx2 = LastCommutableVecOp;
2780 else if (SrcOpIdx2 == CommuteAnyOperandIndex)
2781 // Only one of operands is not fixed.
2782 CommutableOpIdx2 = SrcOpIdx1;
2783
2784 // CommutableOpIdx2 is well defined now. Let's choose another commutable
2785 // operand and assign its index to CommutableOpIdx1.
2786 Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
2787
2788 unsigned CommutableOpIdx1;
2789 for (CommutableOpIdx1 = LastCommutableVecOp;
2790 CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
2791 // Just ignore and skip the k-mask operand.
2792 if (CommutableOpIdx1 == KMaskOp)
2793 continue;
2794
2795 // The commuted operands must have different registers.
2796 // Otherwise, the commute transformation does not change anything and
2797 // is useless then.
2798 if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg())
2799 break;
2800 }
2801
2802 // No appropriate commutable operands were found.
2803 if (CommutableOpIdx1 < FirstCommutableVecOp)
2804 return false;
2805
2806 // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
2807 // to return those values.
2808 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2809 CommutableOpIdx2))
2810 return false;
2811 }
2812
2813 return true;
2814}
2815
2817 unsigned &SrcOpIdx1,
2818 unsigned &SrcOpIdx2) const {
2819 const MCInstrDesc &Desc = MI.getDesc();
2820 if (!Desc.isCommutable())
2821 return false;
2822
2823 switch (MI.getOpcode()) {
2824 case X86::CMPSDrri:
2825 case X86::CMPSSrri:
2826 case X86::CMPPDrri:
2827 case X86::CMPPSrri:
2828 case X86::VCMPSDrri:
2829 case X86::VCMPSSrri:
2830 case X86::VCMPPDrri:
2831 case X86::VCMPPSrri:
2832 case X86::VCMPPDYrri:
2833 case X86::VCMPPSYrri:
2834 case X86::VCMPSDZrri:
2835 case X86::VCMPSSZrri:
2836 case X86::VCMPPDZrri:
2837 case X86::VCMPPSZrri:
2838 case X86::VCMPSHZrri:
2839 case X86::VCMPPHZrri:
2840 case X86::VCMPPHZ128rri:
2841 case X86::VCMPPHZ256rri:
2842 case X86::VCMPPDZ128rri:
2843 case X86::VCMPPSZ128rri:
2844 case X86::VCMPPDZ256rri:
2845 case X86::VCMPPSZ256rri:
2846 case X86::VCMPPDZrrik:
2847 case X86::VCMPPSZrrik:
2848 case X86::VCMPPDZ128rrik:
2849 case X86::VCMPPSZ128rrik:
2850 case X86::VCMPPDZ256rrik:
2851 case X86::VCMPPSZ256rrik: {
2852 unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0;
2853
2854 // Float comparison can be safely commuted for
2855 // Ordered/Unordered/Equal/NotEqual tests
2856 unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
2857 switch (Imm) {
2858 default:
2859 // EVEX versions can be commuted.
2860 if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX)
2861 break;
2862 return false;
2863 case 0x00: // EQUAL
2864 case 0x03: // UNORDERED
2865 case 0x04: // NOT EQUAL
2866 case 0x07: // ORDERED
2867 break;
2868 }
2869
2870 // The indices of the commutable operands are 1 and 2 (or 2 and 3
2871 // when masked).
2872 // Assign them to the returned operand indices here.
2873 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
2874 2 + OpOffset);
2875 }
2876 case X86::MOVSSrr:
2877 // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
2878 // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
2879 // AVX implies sse4.1.
2880 if (Subtarget.hasSSE41())
2881 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2882 return false;
2883 case X86::SHUFPDrri:
2884 // We can commute this to MOVSD.
2885 if (MI.getOperand(3).getImm() == 0x02)
2886 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2887 return false;
2888 case X86::MOVHLPSrr:
2889 case X86::UNPCKHPDrr:
2890 case X86::VMOVHLPSrr:
2891 case X86::VUNPCKHPDrr:
2892 case X86::VMOVHLPSZrr:
2893 case X86::VUNPCKHPDZ128rr:
2894 if (Subtarget.hasSSE2())
2895 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2896 return false;
2897 case X86::VPTERNLOGDZrri:
2898 case X86::VPTERNLOGDZrmi:
2899 case X86::VPTERNLOGDZ128rri:
2900 case X86::VPTERNLOGDZ128rmi:
2901 case X86::VPTERNLOGDZ256rri:
2902 case X86::VPTERNLOGDZ256rmi:
2903 case X86::VPTERNLOGQZrri:
2904 case X86::VPTERNLOGQZrmi:
2905 case X86::VPTERNLOGQZ128rri:
2906 case X86::VPTERNLOGQZ128rmi:
2907 case X86::VPTERNLOGQZ256rri:
2908 case X86::VPTERNLOGQZ256rmi:
2909 case X86::VPTERNLOGDZrrik:
2910 case X86::VPTERNLOGDZ128rrik:
2911 case X86::VPTERNLOGDZ256rrik:
2912 case X86::VPTERNLOGQZrrik:
2913 case X86::VPTERNLOGQZ128rrik:
2914 case X86::VPTERNLOGQZ256rrik:
2915 case X86::VPTERNLOGDZrrikz:
2916 case X86::VPTERNLOGDZrmikz:
2917 case X86::VPTERNLOGDZ128rrikz:
2918 case X86::VPTERNLOGDZ128rmikz:
2919 case X86::VPTERNLOGDZ256rrikz:
2920 case X86::VPTERNLOGDZ256rmikz:
2921 case X86::VPTERNLOGQZrrikz:
2922 case X86::VPTERNLOGQZrmikz:
2923 case X86::VPTERNLOGQZ128rrikz:
2924 case X86::VPTERNLOGQZ128rmikz:
2925 case X86::VPTERNLOGQZ256rrikz:
2926 case X86::VPTERNLOGQZ256rmikz:
2927 case X86::VPTERNLOGDZ128rmbi:
2928 case X86::VPTERNLOGDZ256rmbi:
2929 case X86::VPTERNLOGDZrmbi:
2930 case X86::VPTERNLOGQZ128rmbi:
2931 case X86::VPTERNLOGQZ256rmbi:
2932 case X86::VPTERNLOGQZrmbi:
2933 case X86::VPTERNLOGDZ128rmbikz:
2934 case X86::VPTERNLOGDZ256rmbikz:
2935 case X86::VPTERNLOGDZrmbikz:
2936 case X86::VPTERNLOGQZ128rmbikz:
2937 case X86::VPTERNLOGQZ256rmbikz:
2938 case X86::VPTERNLOGQZrmbikz:
2939 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2940 case X86::VPDPWSSDYrr:
2941 case X86::VPDPWSSDrr:
2942 case X86::VPDPWSSDSYrr:
2943 case X86::VPDPWSSDSrr:
2944 case X86::VPDPWUUDrr:
2945 case X86::VPDPWUUDYrr:
2946 case X86::VPDPWUUDSrr:
2947 case X86::VPDPWUUDSYrr:
2948 case X86::VPDPBSSDSrr:
2949 case X86::VPDPBSSDSYrr:
2950 case X86::VPDPBSSDrr:
2951 case X86::VPDPBSSDYrr:
2952 case X86::VPDPBUUDSrr:
2953 case X86::VPDPBUUDSYrr:
2954 case X86::VPDPBUUDrr:
2955 case X86::VPDPBUUDYrr:
2956 case X86::VPDPBSSDSZ128r:
2957 case X86::VPDPBSSDSZ128rk:
2958 case X86::VPDPBSSDSZ128rkz:
2959 case X86::VPDPBSSDSZ256r:
2960 case X86::VPDPBSSDSZ256rk:
2961 case X86::VPDPBSSDSZ256rkz:
2962 case X86::VPDPBSSDSZr:
2963 case X86::VPDPBSSDSZrk:
2964 case X86::VPDPBSSDSZrkz:
2965 case X86::VPDPBSSDZ128r:
2966 case X86::VPDPBSSDZ128rk:
2967 case X86::VPDPBSSDZ128rkz:
2968 case X86::VPDPBSSDZ256r:
2969 case X86::VPDPBSSDZ256rk:
2970 case X86::VPDPBSSDZ256rkz:
2971 case X86::VPDPBSSDZr:
2972 case X86::VPDPBSSDZrk:
2973 case X86::VPDPBSSDZrkz:
2974 case X86::VPDPBUUDSZ128r:
2975 case X86::VPDPBUUDSZ128rk:
2976 case X86::VPDPBUUDSZ128rkz:
2977 case X86::VPDPBUUDSZ256r:
2978 case X86::VPDPBUUDSZ256rk:
2979 case X86::VPDPBUUDSZ256rkz:
2980 case X86::VPDPBUUDSZr:
2981 case X86::VPDPBUUDSZrk:
2982 case X86::VPDPBUUDSZrkz:
2983 case X86::VPDPBUUDZ128r:
2984 case X86::VPDPBUUDZ128rk:
2985 case X86::VPDPBUUDZ128rkz:
2986 case X86::VPDPBUUDZ256r:
2987 case X86::VPDPBUUDZ256rk:
2988 case X86::VPDPBUUDZ256rkz:
2989 case X86::VPDPBUUDZr:
2990 case X86::VPDPBUUDZrk:
2991 case X86::VPDPBUUDZrkz:
2992 case X86::VPDPWSSDZ128r:
2993 case X86::VPDPWSSDZ128rk:
2994 case X86::VPDPWSSDZ128rkz:
2995 case X86::VPDPWSSDZ256r:
2996 case X86::VPDPWSSDZ256rk:
2997 case X86::VPDPWSSDZ256rkz:
2998 case X86::VPDPWSSDZr:
2999 case X86::VPDPWSSDZrk:
3000 case X86::VPDPWSSDZrkz:
3001 case X86::VPDPWSSDSZ128r:
3002 case X86::VPDPWSSDSZ128rk:
3003 case X86::VPDPWSSDSZ128rkz:
3004 case X86::VPDPWSSDSZ256r:
3005 case X86::VPDPWSSDSZ256rk:
3006 case X86::VPDPWSSDSZ256rkz:
3007 case X86::VPDPWSSDSZr:
3008 case X86::VPDPWSSDSZrk:
3009 case X86::VPDPWSSDSZrkz:
3010 case X86::VPDPWUUDZ128r:
3011 case X86::VPDPWUUDZ128rk:
3012 case X86::VPDPWUUDZ128rkz:
3013 case X86::VPDPWUUDZ256r:
3014 case X86::VPDPWUUDZ256rk:
3015 case X86::VPDPWUUDZ256rkz:
3016 case X86::VPDPWUUDZr:
3017 case X86::VPDPWUUDZrk:
3018 case X86::VPDPWUUDZrkz:
3019 case X86::VPDPWUUDSZ128r:
3020 case X86::VPDPWUUDSZ128rk:
3021 case X86::VPDPWUUDSZ128rkz:
3022 case X86::VPDPWUUDSZ256r:
3023 case X86::VPDPWUUDSZ256rk:
3024 case X86::VPDPWUUDSZ256rkz:
3025 case X86::VPDPWUUDSZr:
3026 case X86::VPDPWUUDSZrk:
3027 case X86::VPDPWUUDSZrkz:
3028 case X86::VPMADD52HUQrr:
3029 case X86::VPMADD52HUQYrr:
3030 case X86::VPMADD52HUQZ128r:
3031 case X86::VPMADD52HUQZ128rk:
3032 case X86::VPMADD52HUQZ128rkz:
3033 case X86::VPMADD52HUQZ256r:
3034 case X86::VPMADD52HUQZ256rk:
3035 case X86::VPMADD52HUQZ256rkz:
3036 case X86::VPMADD52HUQZr:
3037 case X86::VPMADD52HUQZrk:
3038 case X86::VPMADD52HUQZrkz:
3039 case X86::VPMADD52LUQrr:
3040 case X86::VPMADD52LUQYrr:
3041 case X86::VPMADD52LUQZ128r:
3042 case X86::VPMADD52LUQZ128rk:
3043 case X86::VPMADD52LUQZ128rkz:
3044 case X86::VPMADD52LUQZ256r:
3045 case X86::VPMADD52LUQZ256rk:
3046 case X86::VPMADD52LUQZ256rkz:
3047 case X86::VPMADD52LUQZr:
3048 case X86::VPMADD52LUQZrk:
3049 case X86::VPMADD52LUQZrkz:
3050 case X86::VFMADDCPHZr:
3051 case X86::VFMADDCPHZrk:
3052 case X86::VFMADDCPHZrkz:
3053 case X86::VFMADDCPHZ128r:
3054 case X86::VFMADDCPHZ128rk:
3055 case X86::VFMADDCPHZ128rkz:
3056 case X86::VFMADDCPHZ256r:
3057 case X86::VFMADDCPHZ256rk:
3058 case X86::VFMADDCPHZ256rkz:
3059 case X86::VFMADDCSHZr:
3060 case X86::VFMADDCSHZrk:
3061 case X86::VFMADDCSHZrkz: {
3062 unsigned CommutableOpIdx1 = 2;
3063 unsigned CommutableOpIdx2 = 3;
3064 if (X86II::isKMasked(Desc.TSFlags)) {
3065 // Skip the mask register.
3066 ++CommutableOpIdx1;
3067 ++CommutableOpIdx2;
3068 }
3069 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3070 CommutableOpIdx2))
3071 return false;
3072 if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
3073 // No idea.
3074 return false;
3075 return true;
3076 }
3077
3078 default:
3079 const X86InstrFMA3Group *FMA3Group =
3080 getFMA3Group(MI.getOpcode(), MI.getDesc().TSFlags);
3081 if (FMA3Group)
3082 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
3083 FMA3Group->isIntrinsic());
3084
3085 // Handled masked instructions since we need to skip over the mask input
3086 // and the preserved input.
3087 if (X86II::isKMasked(Desc.TSFlags)) {
3088 // First assume that the first input is the mask operand and skip past it.
3089 unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
3090 unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
3091 // Check if the first input is tied. If there isn't one then we only
3092 // need to skip the mask operand which we did above.
3093 if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
3094 MCOI::TIED_TO) != -1)) {
3095 // If this is zero masking instruction with a tied operand, we need to
3096 // move the first index back to the first input since this must
3097 // be a 3 input instruction and we want the first two non-mask inputs.
3098 // Otherwise this is a 2 input instruction with a preserved input and
3099 // mask, so we need to move the indices to skip one more input.
3100 if (X86II::isKMergeMasked(Desc.TSFlags)) {
3101 ++CommutableOpIdx1;
3102 ++CommutableOpIdx2;
3103 } else {
3104 --CommutableOpIdx1;
3105 }
3106 }
3107
3108 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3109 CommutableOpIdx2))
3110 return false;
3111
3112 if (!MI.getOperand(SrcOpIdx1).isReg() ||
3113 !MI.getOperand(SrcOpIdx2).isReg())
3114 // No idea.
3115 return false;
3116 return true;
3117 }
3118
3119 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
3120 }
3121 return false;
3122}
3123
3125 unsigned Opcode = MI->getOpcode();
3126 if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
3127 Opcode != X86::LEA64_32r)
3128 return false;
3129
3130 const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
3131 const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
3132 const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
3133
3134 if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
3135 Scale.getImm() > 1)
3136 return false;
3137
3138 return true;
3139}
3140
3142 // Currently we're interested in following sequence only.
3143 // r3 = lea r1, r2
3144 // r5 = add r3, r4
3145 // Both r3 and r4 are killed in add, we hope the add instruction has the
3146 // operand order
3147 // r5 = add r4, r3
3148 // So later in X86FixupLEAs the lea instruction can be rewritten as add.
3149 unsigned Opcode = MI.getOpcode();
3150 if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
3151 return false;
3152
3153 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3154 Register Reg1 = MI.getOperand(1).getReg();
3155 Register Reg2 = MI.getOperand(2).getReg();
3156
3157 // Check if Reg1 comes from LEA in the same MBB.
3158 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) {
3159 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
3160 Commute = true;
3161 return true;
3162 }
3163 }
3164
3165 // Check if Reg2 comes from LEA in the same MBB.
3166 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) {
3167 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
3168 Commute = false;
3169 return true;
3170 }
3171 }
3172
3173 return false;
3174}
3175
3177 unsigned Opcode = MCID.getOpcode();
3178 if (!(X86::isJCC(Opcode) || X86::isSETCC(Opcode) || X86::isSETZUCC(Opcode) ||
3179 X86::isCMOVCC(Opcode) || X86::isCFCMOVCC(Opcode) ||
3180 X86::isCCMPCC(Opcode) || X86::isCTESTCC(Opcode)))
3181 return -1;
3182 // Assume that condition code is always the last use operand.
3183 unsigned NumUses = MCID.getNumOperands() - MCID.getNumDefs();
3184 return NumUses - 1;
3185}
3186
3188 const MCInstrDesc &MCID = MI.getDesc();
3189 int CondNo = getCondSrcNoFromDesc(MCID);
3190 if (CondNo < 0)
3191 return X86::COND_INVALID;
3192 CondNo += MCID.getNumDefs();
3193 return static_cast<X86::CondCode>(MI.getOperand(CondNo).getImm());
3194}
3195
3197 return X86::isJCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
3199}
3200
3202 return X86::isSETCC(MI.getOpcode()) || X86::isSETZUCC(MI.getOpcode())
3205}
3206
3208 return X86::isCMOVCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
3210}
3211
3213 return X86::isCFCMOVCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
3215}
3216
3218 return X86::isCCMPCC(MI.getOpcode()) || X86::isCTESTCC(MI.getOpcode())
3221}
3222
3224 // CCMP/CTEST has two conditional operands:
3225 // - SCC: source conditonal code (same as CMOV)
3226 // - DCF: destination conditional flags, which has 4 valid bits
3227 //
3228 // +----+----+----+----+
3229 // | OF | SF | ZF | CF |
3230 // +----+----+----+----+
3231 //
3232 // If SCC(source conditional code) evaluates to false, CCMP/CTEST will updates
3233 // the conditional flags by as follows:
3234 //
3235 // OF = DCF.OF
3236 // SF = DCF.SF
3237 // ZF = DCF.ZF
3238 // CF = DCF.CF
3239 // PF = DCF.CF
3240 // AF = 0 (Auxiliary Carry Flag)
3241 //
3242 // Otherwise, the CMP or TEST is executed and it updates the
3243 // CSPAZO flags normally.
3244 //
3245 // NOTE:
3246 // If SCC = P, then SCC evaluates to true regardless of the CSPAZO value.
3247 // If SCC = NP, then SCC evaluates to false regardless of the CSPAZO value.
3248
3249 enum { CF = 1, ZF = 2, SF = 4, OF = 8, PF = CF };
3250
3251 switch (CC) {
3252 default:
3253 llvm_unreachable("Illegal condition code!");
3254 case X86::COND_NO:
3255 case X86::COND_NE:
3256 case X86::COND_GE:
3257 case X86::COND_G:
3258 case X86::COND_AE:
3259 case X86::COND_A:
3260 case X86::COND_NS:
3261 case X86::COND_NP:
3262 return 0;
3263 case X86::COND_O:
3264 return OF;
3265 case X86::COND_B:
3266 case X86::COND_BE:
3267 return CF;
3268 break;
3269 case X86::COND_E:
3270 case X86::COND_LE:
3271 return ZF;
3272 case X86::COND_S:
3273 case X86::COND_L:
3274 return SF;
3275 case X86::COND_P:
3276 return PF;
3277 }
3278}
3279
3280#define GET_X86_NF_TRANSFORM_TABLE
3281#define GET_X86_ND2NONND_TABLE
3282#include "X86GenInstrMapping.inc"
3283
3285 unsigned Opc) {
3286 const auto I = llvm::lower_bound(Table, Opc);
3287 return (I == Table.end() || I->OldOpc != Opc) ? 0U : I->NewOpc;
3288}
3289unsigned X86::getNFVariant(unsigned Opc) {
3290 return getNewOpcFromTable(X86NFTransformTable, Opc);
3291}
3292
3293unsigned X86::getNonNDVariant(unsigned Opc) {
3294 return getNewOpcFromTable(X86ND2NonNDTable, Opc);
3295}
3296
3297/// Return the inverse of the specified condition,
3298/// e.g. turning COND_E to COND_NE.
3300 switch (CC) {
3301 default:
3302 llvm_unreachable("Illegal condition code!");
3303 case X86::COND_E:
3304 return X86::COND_NE;
3305 case X86::COND_NE:
3306 return X86::COND_E;
3307 case X86::COND_L:
3308 return X86::COND_GE;
3309 case X86::COND_LE:
3310 return X86::COND_G;
3311 case X86::COND_G:
3312 return X86::COND_LE;
3313 case X86::COND_GE:
3314 return X86::COND_L;
3315 case X86::COND_B:
3316 return X86::COND_AE;
3317 case X86::COND_BE:
3318 return X86::COND_A;
3319 case X86::COND_A:
3320 return X86::COND_BE;
3321 case X86::COND_AE:
3322 return X86::COND_B;
3323 case X86::COND_S:
3324 return X86::COND_NS;
3325 case X86::COND_NS:
3326 return X86::COND_S;
3327 case X86::COND_P:
3328 return X86::COND_NP;
3329 case X86::COND_NP:
3330 return X86::COND_P;
3331 case X86::COND_O:
3332 return X86::COND_NO;
3333 case X86::COND_NO:
3334 return X86::COND_O;
3335 case X86::COND_NE_OR_P:
3336 return X86::COND_E_AND_NP;
3337 case X86::COND_E_AND_NP:
3338 return X86::COND_NE_OR_P;
3339 }
3340}
3341
3342/// Assuming the flags are set by MI(a,b), return the condition code if we
3343/// modify the instructions such that flags are set by MI(b,a).
3345 switch (CC) {
3346 default:
3347 return X86::COND_INVALID;
3348 case X86::COND_E:
3349 return X86::COND_E;
3350 case X86::COND_NE:
3351 return X86::COND_NE;
3352 case X86::COND_L:
3353 return X86::COND_G;
3354 case X86::COND_LE:
3355 return X86::COND_GE;
3356 case X86::COND_G:
3357 return X86::COND_L;
3358 case X86::COND_GE:
3359 return X86::COND_LE;
3360 case X86::COND_B:
3361 return X86::COND_A;
3362 case X86::COND_BE:
3363 return X86::COND_AE;
3364 case X86::COND_A:
3365 return X86::COND_B;
3366 case X86::COND_AE:
3367 return X86::COND_BE;
3368 }
3369}
3370
3371std::pair<X86::CondCode, bool>
3374 bool NeedSwap = false;
3375 switch (Predicate) {
3376 default:
3377 break;
3378 // Floating-point Predicates
3379 case CmpInst::FCMP_UEQ:
3380 CC = X86::COND_E;
3381 break;
3382 case CmpInst::FCMP_OLT:
3383 NeedSwap = true;
3384 [[fallthrough]];
3385 case CmpInst::FCMP_OGT:
3386 CC = X86::COND_A;
3387 break;
3388 case CmpInst::FCMP_OLE:
3389 NeedSwap = true;
3390 [[fallthrough]];
3391 case CmpInst::FCMP_OGE:
3392 CC = X86::COND_AE;
3393 break;
3394 case CmpInst::FCMP_UGT:
3395 NeedSwap = true;
3396 [[fallthrough]];
3397 case CmpInst::FCMP_ULT:
3398 CC = X86::COND_B;
3399 break;
3400 case CmpInst::FCMP_UGE:
3401 NeedSwap = true;
3402 [[fallthrough]];
3403 case CmpInst::FCMP_ULE:
3404 CC = X86::COND_BE;
3405 break;
3406 case CmpInst::FCMP_ONE:
3407 CC = X86::COND_NE;
3408 break;
3409 case CmpInst::FCMP_UNO:
3410 CC = X86::COND_P;
3411 break;
3412 case CmpInst::FCMP_ORD:
3413 CC = X86::COND_NP;
3414 break;
3415 case CmpInst::FCMP_OEQ:
3416 [[fallthrough]];
3417 case CmpInst::FCMP_UNE:
3419 break;
3420
3421 // Integer Predicates
3422 case CmpInst::ICMP_EQ:
3423 CC = X86::COND_E;
3424 break;
3425 case CmpInst::ICMP_NE:
3426 CC = X86::COND_NE;
3427 break;
3428 case CmpInst::ICMP_UGT:
3429 CC = X86::COND_A;
3430 break;
3431 case CmpInst::ICMP_UGE:
3432 CC = X86::COND_AE;
3433 break;
3434 case CmpInst::ICMP_ULT:
3435 CC = X86::COND_B;
3436 break;
3437 case CmpInst::ICMP_ULE:
3438 CC = X86::COND_BE;
3439 break;
3440 case CmpInst::ICMP_SGT:
3441 CC = X86::COND_G;
3442 break;
3443 case CmpInst::ICMP_SGE:
3444 CC = X86::COND_GE;
3445 break;
3446 case CmpInst::ICMP_SLT:
3447 CC = X86::COND_L;
3448 break;
3449 case CmpInst::ICMP_SLE:
3450 CC = X86::COND_LE;
3451 break;
3452 }
3453
3454 return std::make_pair(CC, NeedSwap);
3455}
3456
3457/// Return a cmov opcode for the given register size in bytes, and operand type.
3458unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand,
3459 bool HasNDD) {
3460 switch (RegBytes) {
3461 default:
3462 llvm_unreachable("Illegal register size!");
3463#define GET_ND_IF_ENABLED(OPC) (HasNDD ? OPC##_ND : OPC)
3464 case 2:
3465 return HasMemoryOperand ? GET_ND_IF_ENABLED(X86::CMOV16rm)
3466 : GET_ND_IF_ENABLED(X86::CMOV16rr);
3467 case 4:
3468 return HasMemoryOperand ? GET_ND_IF_ENABLED(X86::CMOV32rm)
3469 : GET_ND_IF_ENABLED(X86::CMOV32rr);
3470 case 8:
3471 return HasMemoryOperand ? GET_ND_IF_ENABLED(X86::CMOV64rm)
3472 : GET_ND_IF_ENABLED(X86::CMOV64rr);
3473 }
3474}
3475
3476/// Get the VPCMP immediate for the given condition.
3478 switch (CC) {
3479 default:
3480 llvm_unreachable("Unexpected SETCC condition");
3481 case ISD::SETNE:
3482 return 4;
3483 case ISD::SETEQ:
3484 return 0;
3485 case ISD::SETULT:
3486 case ISD::SETLT:
3487 return 1;
3488 case ISD::SETUGT:
3489 case ISD::SETGT:
3490 return 6;
3491 case ISD::SETUGE:
3492 case ISD::SETGE:
3493 return 5;
3494 case ISD::SETULE:
3495 case ISD::SETLE:
3496 return 2;
3497 }
3498}
3499
3500/// Get the VPCMP immediate if the operands are swapped.
3501unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
3502 switch (Imm) {
3503 default:
3504 llvm_unreachable("Unreachable!");
3505 case 0x01:
3506 Imm = 0x06;
3507 break; // LT -> NLE
3508 case 0x02:
3509 Imm = 0x05;
3510 break; // LE -> NLT
3511 case 0x05:
3512 Imm = 0x02;
3513 break; // NLT -> LE
3514 case 0x06:
3515 Imm = 0x01;
3516 break; // NLE -> LT
3517 case 0x00: // EQ
3518 case 0x03: // FALSE
3519 case 0x04: // NE
3520 case 0x07: // TRUE
3521 break;
3522 }
3523
3524 return Imm;
3525}
3526
3527/// Get the VPCOM immediate if the operands are swapped.
3528unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
3529 switch (Imm) {
3530 default:
3531 llvm_unreachable("Unreachable!");
3532 case 0x00:
3533 Imm = 0x02;
3534 break; // LT -> GT
3535 case 0x01:
3536 Imm = 0x03;
3537 break; // LE -> GE
3538 case 0x02:
3539 Imm = 0x00;
3540 break; // GT -> LT
3541 case 0x03:
3542 Imm = 0x01;
3543 break; // GE -> LE
3544 case 0x04: // EQ
3545 case 0x05: // NE
3546 case 0x06: // FALSE
3547 case 0x07: // TRUE
3548 break;
3549 }
3550
3551 return Imm;
3552}
3553
3554/// Get the VCMP immediate if the operands are swapped.
3555unsigned X86::getSwappedVCMPImm(unsigned Imm) {
3556 // Only need the lower 2 bits to distinquish.
3557 switch (Imm & 0x3) {
3558 default:
3559 llvm_unreachable("Unreachable!");
3560 case 0x00:
3561 case 0x03:
3562 // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted.
3563 break;
3564 case 0x01:
3565 case 0x02:
3566 // Need to toggle bits 3:0. Bit 4 stays the same.
3567 Imm ^= 0xf;
3568 break;
3569 }
3570
3571 return Imm;
3572}
3573
3575 if (Info.RegClass == X86::VR128RegClassID ||
3576 Info.RegClass == X86::VR128XRegClassID)
3577 return 128;
3578 if (Info.RegClass == X86::VR256RegClassID ||
3579 Info.RegClass == X86::VR256XRegClassID)
3580 return 256;
3581 if (Info.RegClass == X86::VR512RegClassID)
3582 return 512;
3583 llvm_unreachable("Unknown register class!");
3584}
3585
3586/// Return true if the Reg is X87 register.
3587static bool isX87Reg(unsigned Reg) {
3588 return (Reg == X86::FPCW || Reg == X86::FPSW ||
3589 (Reg >= X86::ST0 && Reg <= X86::ST7));
3590}
3591
3592/// check if the instruction is X87 instruction
3594 // Call and inlineasm defs X87 register, so we special case it here because
3595 // otherwise calls are incorrectly flagged as x87 instructions
3596 // as a result.
3597 if (MI.isCall() || MI.isInlineAsm())
3598 return false;
3599 for (const MachineOperand &MO : MI.operands()) {
3600 if (!MO.isReg())
3601 continue;
3602 if (isX87Reg(MO.getReg()))
3603 return true;
3604 }
3605 return false;
3606}
3607
3609 auto IsMemOp = [](const MCOperandInfo &OpInfo) {
3610 return OpInfo.OperandType == MCOI::OPERAND_MEMORY;
3611 };
3612
3613 const MCInstrDesc &Desc = MI.getDesc();
3614
3615 // Directly invoke the MC-layer routine for real (i.e., non-pseudo)
3616 // instructions (fast case).
3617 if (!X86II::isPseudo(Desc.TSFlags)) {
3618 int MemRefIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
3619 if (MemRefIdx >= 0)
3620 return MemRefIdx + X86II::getOperandBias(Desc);
3621#ifdef EXPENSIVE_CHECKS
3622 assert(none_of(Desc.operands(), IsMemOp) &&
3623 "Got false negative from X86II::getMemoryOperandNo()!");
3624#endif
3625 return -1;
3626 }
3627
3628 // Otherwise, handle pseudo instructions by examining the type of their
3629 // operands (slow case). An instruction cannot have a memory reference if it
3630 // has fewer than AddrNumOperands (= 5) explicit operands.
3631 unsigned NumOps = Desc.getNumOperands();
3632 if (NumOps < X86::AddrNumOperands) {
3633#ifdef EXPENSIVE_CHECKS
3634 assert(none_of(Desc.operands(), IsMemOp) &&
3635 "Expected no operands to have OPERAND_MEMORY type!");
3636#endif
3637 return -1;
3638 }
3639
3640 // The first operand with type OPERAND_MEMORY indicates the start of a memory
3641 // reference. We expect the following AddrNumOperand-1 operands to also have
3642 // OPERAND_MEMORY type.
3643 for (unsigned I = 0, E = NumOps - X86::AddrNumOperands; I != E; ++I) {
3644 if (IsMemOp(Desc.operands()[I])) {
3645#ifdef EXPENSIVE_CHECKS
3646 assert(std::all_of(Desc.operands().begin() + I,
3647 Desc.operands().begin() + I + X86::AddrNumOperands,
3648 IsMemOp) &&
3649 "Expected all five operands in the memory reference to have "
3650 "OPERAND_MEMORY type!");
3651#endif
3652 return I;
3653 }
3654 }
3655
3656 return -1;
3657}
3658
3660 unsigned OpNo) {
3661 assert(MI.getNumOperands() >= (OpNo + X86::AddrNumOperands) &&
3662 "Unexpected number of operands!");
3663
3664 const MachineOperand &Index = MI.getOperand(OpNo + X86::AddrIndexReg);
3665 if (!Index.isReg() || Index.getReg() != X86::NoRegister)
3666 return nullptr;
3667
3668 const MachineOperand &Disp = MI.getOperand(OpNo + X86::AddrDisp);
3669 if (!Disp.isCPI() || Disp.getOffset() != 0)
3670 return nullptr;
3671
3673 MI.getParent()->getParent()->getConstantPool()->getConstants();
3674 const MachineConstantPoolEntry &ConstantEntry = Constants[Disp.getIndex()];
3675
3676 // Bail if this is a machine constant pool entry, we won't be able to dig out
3677 // anything useful.
3678 if (ConstantEntry.isMachineConstantPoolEntry())
3679 return nullptr;
3680
3681 return ConstantEntry.Val.ConstVal;
3682}
3683
3685 switch (MI.getOpcode()) {
3686 case X86::TCRETURNdi:
3687 case X86::TCRETURNri:
3688 case X86::TCRETURNmi:
3689 case X86::TCRETURNdi64:
3690 case X86::TCRETURNri64:
3691 case X86::TCRETURNmi64:
3692 return true;
3693 default:
3694 return false;
3695 }
3696}
3697
3700 const MachineInstr &TailCall) const {
3701
3702 const MachineFunction *MF = TailCall.getMF();
3703
3704 if (MF->getTarget().getCodeModel() == CodeModel::Kernel) {
3705 // Kernel patches thunk calls in runtime, these should never be conditional.
3706 const MachineOperand &Target = TailCall.getOperand(0);
3707 if (Target.isSymbol()) {
3708 StringRef Symbol(Target.getSymbolName());
3709 // this is currently only relevant to r11/kernel indirect thunk.
3710 if (Symbol == "__x86_indirect_thunk_r11")
3711 return false;
3712 }
3713 }
3714
3715 if (TailCall.getOpcode() != X86::TCRETURNdi &&
3716 TailCall.getOpcode() != X86::TCRETURNdi64) {
3717 // Only direct calls can be done with a conditional branch.
3718 return false;
3719 }
3720
3721 if (Subtarget.isTargetWin64() && MF->hasWinCFI()) {
3722 // Conditional tail calls confuse the Win64 unwinder.
3723 return false;
3724 }
3725
3726 assert(BranchCond.size() == 1);
3727 if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
3728 // Can't make a conditional tail call with this condition.
3729 return false;
3730 }
3731
3733 if (X86FI->getTCReturnAddrDelta() != 0 ||
3734 TailCall.getOperand(1).getImm() != 0) {
3735 // A conditional tail call cannot do any stack adjustment.
3736 return false;
3737 }
3738
3739 return true;
3740}
3741
3744 const MachineInstr &TailCall) const {
3745 assert(canMakeTailCallConditional(BranchCond, TailCall));
3746
3748 while (I != MBB.begin()) {
3749 --I;
3750 if (I->isDebugInstr())
3751 continue;
3752 if (!I->isBranch())
3753 assert(0 && "Can't find the branch to replace!");
3754
3756 assert(BranchCond.size() == 1);
3757 if (CC != BranchCond[0].getImm())
3758 continue;
3759
3760 break;
3761 }
3762
3763 unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
3764 : X86::TCRETURNdi64cc;
3765
3766 auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
3767 MIB->addOperand(TailCall.getOperand(0)); // Destination.
3768 MIB.addImm(0); // Stack offset (not used).
3769 MIB->addOperand(BranchCond[0]); // Condition.
3770 MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
3771
3772 // Add implicit uses and defs of all live regs potentially clobbered by the
3773 // call. This way they still appear live across the call.
3774 LivePhysRegs LiveRegs(getRegisterInfo());
3775 LiveRegs.addLiveOuts(MBB);
3777 LiveRegs.stepForward(*MIB, Clobbers);
3778 for (const auto &C : Clobbers) {
3779 MIB.addReg(C.first, RegState::Implicit);
3781 }
3782
3783 I->eraseFromParent();
3784}
3785
3786// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
3787// not be a fallthrough MBB now due to layout changes). Return nullptr if the
3788// fallthrough MBB cannot be identified.
3791 // Look for non-EHPad successors other than TBB. If we find exactly one, it
3792 // is the fallthrough MBB. If we find zero, then TBB is both the target MBB
3793 // and fallthrough MBB. If we find more than one, we cannot identify the
3794 // fallthrough MBB and should return nullptr.
3795 MachineBasicBlock *FallthroughBB = nullptr;
3796 for (MachineBasicBlock *Succ : MBB->successors()) {
3797 if (Succ->isEHPad() || (Succ == TBB && FallthroughBB))
3798 continue;
3799 // Return a nullptr if we found more than one fallthrough successor.
3800 if (FallthroughBB && FallthroughBB != TBB)
3801 return nullptr;
3802 FallthroughBB = Succ;
3803 }
3804 return FallthroughBB;
3805}
3806
3807bool X86InstrInfo::analyzeBranchImpl(
3810 SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
3811
3812 // Start from the bottom of the block and work up, examining the
3813 // terminator instructions.
3815 MachineBasicBlock::iterator UnCondBrIter = MBB.end();
3816 while (I != MBB.begin()) {
3817 --I;
3818 if (I->isDebugInstr())
3819 continue;
3820
3821 // Working from the bottom, when we see a non-terminator instruction, we're
3822 // done.
3823 if (!isUnpredicatedTerminator(*I))
3824 break;
3825
3826 // A terminator that isn't a branch can't easily be handled by this
3827 // analysis.
3828 if (!I->isBranch())
3829 return true;
3830
3831 // Handle unconditional branches.
3832 if (I->getOpcode() == X86::JMP_1) {
3833 UnCondBrIter = I;
3834
3835 if (!AllowModify) {
3836 TBB = I->getOperand(0).getMBB();
3837 continue;
3838 }
3839
3840 // If the block has any instructions after a JMP, delete them.
3841 MBB.erase(std::next(I), MBB.end());
3842
3843 Cond.clear();
3844 FBB = nullptr;
3845
3846 // Delete the JMP if it's equivalent to a fall-through.
3847 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
3848 TBB = nullptr;
3849 I->eraseFromParent();
3850 I = MBB.end();
3851 UnCondBrIter = MBB.end();
3852 continue;
3853 }
3854
3855 // TBB is used to indicate the unconditional destination.
3856 TBB = I->getOperand(0).getMBB();
3857 continue;
3858 }
3859
3860 // Handle conditional branches.
3861 X86::CondCode BranchCode = X86::getCondFromBranch(*I);
3862 if (BranchCode == X86::COND_INVALID)
3863 return true; // Can't handle indirect branch.
3864
3865 // In practice we should never have an undef eflags operand, if we do
3866 // abort here as we are not prepared to preserve the flag.
3867 if (I->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->isUndef())
3868 return true;
3869
3870 // Working from the bottom, handle the first conditional branch.
3871 if (Cond.empty()) {
3872 FBB = TBB;
3873 TBB = I->getOperand(0).getMBB();
3874 Cond.push_back(MachineOperand::CreateImm(BranchCode));
3875 CondBranches.push_back(&*I);
3876 continue;
3877 }
3878
3879 // Handle subsequent conditional branches. Only handle the case where all
3880 // conditional branches branch to the same destination and their condition
3881 // opcodes fit one of the special multi-branch idioms.
3882 assert(Cond.size() == 1);
3883 assert(TBB);
3884
3885 // If the conditions are the same, we can leave them alone.
3886 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
3887 auto NewTBB = I->getOperand(0).getMBB();
3888 if (OldBranchCode == BranchCode && TBB == NewTBB)
3889 continue;
3890
3891 // If they differ, see if they fit one of the known patterns. Theoretically,
3892 // we could handle more patterns here, but we shouldn't expect to see them
3893 // if instruction selection has done a reasonable job.
3894 if (TBB == NewTBB &&
3895 ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
3896 (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
3897 BranchCode = X86::COND_NE_OR_P;
3898 } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
3899 (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
3900 if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB)))
3901 return true;
3902
3903 // X86::COND_E_AND_NP usually has two different branch destinations.
3904 //
3905 // JP B1
3906 // JE B2
3907 // JMP B1
3908 // B1:
3909 // B2:
3910 //
3911 // Here this condition branches to B2 only if NP && E. It has another
3912 // equivalent form:
3913 //
3914 // JNE B1
3915 // JNP B2
3916 // JMP B1
3917 // B1:
3918 // B2:
3919 //
3920 // Similarly it branches to B2 only if E && NP. That is why this condition
3921 // is named with COND_E_AND_NP.
3922 BranchCode = X86::COND_E_AND_NP;
3923 } else
3924 return true;
3925
3926 // Update the MachineOperand.
3927 Cond[0].setImm(BranchCode);
3928 CondBranches.push_back(&*I);
3929 }
3930
3931 return false;
3932}
3933
3936 MachineBasicBlock *&FBB,
3938 bool AllowModify) const {
3939 SmallVector<MachineInstr *, 4> CondBranches;
3940 return analyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
3941}
3942
3944 const MCInstrDesc &Desc = MI.getDesc();
3945 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
3946 assert(MemRefBegin >= 0 && "instr should have memory operand");
3947 MemRefBegin += X86II::getOperandBias(Desc);
3948
3949 const MachineOperand &MO = MI.getOperand(MemRefBegin + X86::AddrDisp);
3950 if (!MO.isJTI())
3951 return -1;
3952
3953 return MO.getIndex();
3954}
3955
3957 Register Reg) {
3958 if (!Reg.isVirtual())
3959 return -1;
3960 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
3961 if (MI == nullptr)
3962 return -1;
3963 unsigned Opcode = MI->getOpcode();
3964 if (Opcode != X86::LEA64r && Opcode != X86::LEA32r)
3965 return -1;
3967}
3968
3970 unsigned Opcode = MI.getOpcode();
3971 // Switch-jump pattern for non-PIC code looks like:
3972 // JMP64m $noreg, 8, %X, %jump-table.X, $noreg
3973 if (Opcode == X86::JMP64m || Opcode == X86::JMP32m) {
3975 }
3976 // The pattern for PIC code looks like:
3977 // %0 = LEA64r $rip, 1, $noreg, %jump-table.X
3978 // %1 = MOVSX64rm32 %0, 4, XX, 0, $noreg
3979 // %2 = ADD64rr %1, %0
3980 // JMP64r %2
3981 if (Opcode == X86::JMP64r || Opcode == X86::JMP32r) {
3982 Register Reg = MI.getOperand(0).getReg();
3983 if (!Reg.isVirtual())
3984 return -1;
3985 const MachineFunction &MF = *MI.getParent()->getParent();
3986 const MachineRegisterInfo &MRI = MF.getRegInfo();
3987 MachineInstr *Add = MRI.getUniqueVRegDef(Reg);
3988 if (Add == nullptr)
3989 return -1;
3990 if (Add->getOpcode() != X86::ADD64rr && Add->getOpcode() != X86::ADD32rr)
3991 return -1;
3992 int JTI1 = getJumpTableIndexFromReg(MRI, Add->getOperand(1).getReg());
3993 if (JTI1 >= 0)
3994 return JTI1;
3995 int JTI2 = getJumpTableIndexFromReg(MRI, Add->getOperand(2).getReg());
3996 if (JTI2 >= 0)
3997 return JTI2;
3998 }
3999 return -1;
4000}
4001
4003 MachineBranchPredicate &MBP,
4004 bool AllowModify) const {
4005 using namespace std::placeholders;
4006
4008 SmallVector<MachineInstr *, 4> CondBranches;
4009 if (analyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
4010 AllowModify))
4011 return true;
4012
4013 if (Cond.size() != 1)
4014 return true;
4015
4016 assert(MBP.TrueDest && "expected!");
4017
4018 if (!MBP.FalseDest)
4019 MBP.FalseDest = MBB.getNextNode();
4020
4022
4023 MachineInstr *ConditionDef = nullptr;
4024 bool SingleUseCondition = true;
4025
4027 if (MI.modifiesRegister(X86::EFLAGS, TRI)) {
4028 ConditionDef = &MI;
4029 break;
4030 }
4031
4032 if (MI.readsRegister(X86::EFLAGS, TRI))
4033 SingleUseCondition = false;
4034 }
4035
4036 if (!ConditionDef)
4037 return true;
4038
4039 if (SingleUseCondition) {
4040 for (auto *Succ : MBB.successors())
4041 if (Succ->isLiveIn(X86::EFLAGS))
4042 SingleUseCondition = false;
4043 }
4044
4045 MBP.ConditionDef = ConditionDef;
4046 MBP.SingleUseCondition = SingleUseCondition;
4047
4048 // Currently we only recognize the simple pattern:
4049 //
4050 // test %reg, %reg
4051 // je %label
4052 //
4053 const unsigned TestOpcode =
4054 Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
4055
4056 if (ConditionDef->getOpcode() == TestOpcode &&
4057 ConditionDef->getNumOperands() == 3 &&
4058 ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
4059 (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
4060 MBP.LHS = ConditionDef->getOperand(0);
4061 MBP.RHS = MachineOperand::CreateImm(0);
4062 MBP.Predicate = Cond[0].getImm() == X86::COND_NE
4063 ? MachineBranchPredicate::PRED_NE
4064 : MachineBranchPredicate::PRED_EQ;
4065 return false;
4066 }
4067
4068 return true;
4069}
4070
4072 int *BytesRemoved) const {
4073 assert(!BytesRemoved && "code size not handled");
4074
4076 unsigned Count = 0;
4077
4078 while (I != MBB.begin()) {
4079 --I;
4080 if (I->isDebugInstr())
4081 continue;
4082 if (I->getOpcode() != X86::JMP_1 &&
4084 break;
4085 // Remove the branch.
4086 I->eraseFromParent();
4087 I = MBB.end();
4088 ++Count;
4089 }
4090
4091 return Count;
4092}
4093
4096 MachineBasicBlock *FBB,
4098 const DebugLoc &DL, int *BytesAdded) const {
4099 // Shouldn't be a fall through.
4100 assert(TBB && "insertBranch must not be told to insert a fallthrough");
4101 assert((Cond.size() == 1 || Cond.size() == 0) &&
4102 "X86 branch conditions have one component!");
4103 assert(!BytesAdded && "code size not handled");
4104
4105 if (Cond.empty()) {
4106 // Unconditional branch?
4107 assert(!FBB && "Unconditional branch with multiple successors!");
4108 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
4109 return 1;
4110 }
4111
4112 // If FBB is null, it is implied to be a fall-through block.
4113 bool FallThru = FBB == nullptr;
4114
4115 // Conditional branch.
4116 unsigned Count = 0;
4117 X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
4118 switch (CC) {
4119 case X86::COND_NE_OR_P:
4120 // Synthesize NE_OR_P with two branches.
4121 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE);
4122 ++Count;
4123 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P);
4124 ++Count;
4125 break;
4126 case X86::COND_E_AND_NP:
4127 // Use the next block of MBB as FBB if it is null.
4128 if (FBB == nullptr) {
4129 FBB = getFallThroughMBB(&MBB, TBB);
4130 assert(FBB && "MBB cannot be the last block in function when the false "
4131 "body is a fall-through.");
4132 }
4133 // Synthesize COND_E_AND_NP with two branches.
4134 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE);
4135 ++Count;
4136 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP);
4137 ++Count;
4138 break;
4139 default: {
4140 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC);
4141 ++Count;
4142 }
4143 }
4144 if (!FallThru) {
4145 // Two-way Conditional branch. Insert the second branch.
4146 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
4147 ++Count;
4148 }
4149 return Count;
4150}
4151
4154 Register DstReg, Register TrueReg,
4155 Register FalseReg, int &CondCycles,
4156 int &TrueCycles, int &FalseCycles) const {
4157 // Not all subtargets have cmov instructions.
4158 if (!Subtarget.canUseCMOV())
4159 return false;
4160 if (Cond.size() != 1)
4161 return false;
4162 // We cannot do the composite conditions, at least not in SSA form.
4163 if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND)
4164 return false;
4165
4166 // Check register classes.
4168 const TargetRegisterClass *RC =
4169 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
4170 if (!RC)
4171 return false;
4172
4173 // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
4174 if (X86::GR16RegClass.hasSubClassEq(RC) ||
4175 X86::GR32RegClass.hasSubClassEq(RC) ||
4176 X86::GR64RegClass.hasSubClassEq(RC)) {
4177 // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
4178 // Bridge. Probably Ivy Bridge as well.
4179 CondCycles = 2;
4180 TrueCycles = 2;
4181 FalseCycles = 2;
4182 return true;
4183 }
4184
4185 // Can't do vectors.
4186 return false;
4187}
4188
4191 const DebugLoc &DL, Register DstReg,
4193 Register FalseReg) const {
4195 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
4196 const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
4197 assert(Cond.size() == 1 && "Invalid Cond array");
4198 unsigned Opc =
4199 X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8,
4200 false /*HasMemoryOperand*/, Subtarget.hasNDD());
4201 BuildMI(MBB, I, DL, get(Opc), DstReg)
4202 .addReg(FalseReg)
4203 .addReg(TrueReg)
4204 .addImm(Cond[0].getImm());
4205}
4206
4207/// Test if the given register is a physical h register.
4208static bool isHReg(unsigned Reg) {
4209 return X86::GR8_ABCD_HRegClass.contains(Reg);
4210}
4211
4212// Try and copy between VR128/VR64 and GR64 registers.
4213static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
4214 const X86Subtarget &Subtarget) {
4215 bool HasAVX = Subtarget.hasAVX();
4216 bool HasAVX512 = Subtarget.hasAVX512();
4217 bool HasEGPR = Subtarget.hasEGPR();
4218
4219 // SrcReg(MaskReg) -> DestReg(GR64)
4220 // SrcReg(MaskReg) -> DestReg(GR32)
4221
4222 // All KMASK RegClasses hold the same k registers, can be tested against
4223 // anyone.
4224 if (X86::VK16RegClass.contains(SrcReg)) {
4225 if (X86::GR64RegClass.contains(DestReg)) {
4226 assert(Subtarget.hasBWI());
4227 return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
4228 }
4229 if (X86::GR32RegClass.contains(DestReg))
4230 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
4231 : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
4232 }
4233
4234 // SrcReg(GR64) -> DestReg(MaskReg)
4235 // SrcReg(GR32) -> DestReg(MaskReg)
4236
4237 // All KMASK RegClasses hold the same k registers, can be tested against
4238 // anyone.
4239 if (X86::VK16RegClass.contains(DestReg)) {
4240 if (X86::GR64RegClass.contains(SrcReg)) {
4241 assert(Subtarget.hasBWI());
4242 return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
4243 }
4244 if (X86::GR32RegClass.contains(SrcReg))
4245 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
4246 : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
4247 }
4248
4249 // SrcReg(VR128) -> DestReg(GR64)
4250 // SrcReg(VR64) -> DestReg(GR64)
4251 // SrcReg(GR64) -> DestReg(VR128)
4252 // SrcReg(GR64) -> DestReg(VR64)
4253
4254 if (X86::GR64RegClass.contains(DestReg)) {
4255 if (X86::VR128XRegClass.contains(SrcReg))
4256 // Copy from a VR128 register to a GR64 register.
4257 return HasAVX512 ? X86::VMOVPQIto64Zrr
4258 : HasAVX ? X86::VMOVPQIto64rr
4259 : X86::MOVPQIto64rr;
4260 if (X86::VR64RegClass.contains(SrcReg))
4261 // Copy from a VR64 register to a GR64 register.
4262 return X86::MMX_MOVD64from64rr;
4263 } else if (X86::GR64RegClass.contains(SrcReg)) {
4264 // Copy from a GR64 register to a VR128 register.
4265 if (X86::VR128XRegClass.contains(DestReg))
4266 return HasAVX512 ? X86::VMOV64toPQIZrr
4267 : HasAVX ? X86::VMOV64toPQIrr
4268 : X86::MOV64toPQIrr;
4269 // Copy from a GR64 register to a VR64 register.
4270 if (X86::VR64RegClass.contains(DestReg))
4271 return X86::MMX_MOVD64to64rr;
4272 }
4273
4274 // SrcReg(VR128) -> DestReg(GR32)
4275 // SrcReg(GR32) -> DestReg(VR128)
4276
4277 if (X86::GR32RegClass.contains(DestReg) &&
4278 X86::VR128XRegClass.contains(SrcReg))
4279 // Copy from a VR128 register to a GR32 register.
4280 return HasAVX512 ? X86::VMOVPDI2DIZrr
4281 : HasAVX ? X86::VMOVPDI2DIrr
4282 : X86::MOVPDI2DIrr;
4283
4284 if (X86::VR128XRegClass.contains(DestReg) &&
4285 X86::GR32RegClass.contains(SrcReg))
4286 // Copy from a VR128 register to a VR128 register.
4287 return HasAVX512 ? X86::VMOVDI2PDIZrr
4288 : HasAVX ? X86::VMOVDI2PDIrr
4289 : X86::MOVDI2PDIrr;
4290 return 0;
4291}
4292
4295 const DebugLoc &DL, MCRegister DestReg,
4296 MCRegister SrcReg, bool KillSrc) const {
4297 // First deal with the normal symmetric copies.
4298 bool HasAVX = Subtarget.hasAVX();
4299 bool HasVLX = Subtarget.hasVLX();
4300 bool HasEGPR = Subtarget.hasEGPR();
4301 unsigned Opc = 0;
4302 if (X86::GR64RegClass.contains(DestReg, SrcReg))
4303 Opc = X86::MOV64rr;
4304 else if (X86::GR32RegClass.contains(DestReg, SrcReg))
4305 Opc = X86::MOV32rr;
4306 else if (X86::GR16RegClass.contains(DestReg, SrcReg))
4307 Opc = X86::MOV16rr;
4308 else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
4309 // Copying to or from a physical H register on x86-64 requires a NOREX
4310 // move. Otherwise use a normal move.
4311 if ((isHReg(DestReg) || isHReg(SrcReg)) && Subtarget.is64Bit()) {
4312 Opc = X86::MOV8rr_NOREX;
4313 // Both operands must be encodable without an REX prefix.
4314 assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
4315 "8-bit H register can not be copied outside GR8_NOREX");
4316 } else
4317 Opc = X86::MOV8rr;
4318 } else if (X86::VR64RegClass.contains(DestReg, SrcReg))
4319 Opc = X86::MMX_MOVQ64rr;
4320 else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
4321 if (HasVLX)
4322 Opc = X86::VMOVAPSZ128rr;
4323 else if (X86::VR128RegClass.contains(DestReg, SrcReg))
4324 Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
4325 else {
4326 // If this an extended register and we don't have VLX we need to use a
4327 // 512-bit move.
4328 Opc = X86::VMOVAPSZrr;
4330 DestReg =
4331 TRI->getMatchingSuperReg(DestReg, X86::sub_xmm, &X86::VR512RegClass);
4332 SrcReg =
4333 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
4334 }
4335 } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
4336 if (HasVLX)
4337 Opc = X86::VMOVAPSZ256rr;
4338 else if (X86::VR256RegClass.contains(DestReg, SrcReg))
4339 Opc = X86::VMOVAPSYrr;
4340 else {
4341 // If this an extended register and we don't have VLX we need to use a
4342 // 512-bit move.
4343 Opc = X86::VMOVAPSZrr;
4345 DestReg =
4346 TRI->getMatchingSuperReg(DestReg, X86::sub_ymm, &X86::VR512RegClass);
4347 SrcReg =
4348 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
4349 }
4350 } else if (X86::VR512RegClass.contains(DestReg, SrcReg))
4351 Opc = X86::VMOVAPSZrr;
4352 // All KMASK RegClasses hold the same k registers, can be tested against
4353 // anyone.
4354 else if (X86::VK16RegClass.contains(DestReg, SrcReg))
4355 Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
4356 : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
4357 if (!Opc)
4358 Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
4359
4360 if (Opc) {
4361 BuildMI(MBB, MI, DL, get(Opc), DestReg)
4362 .addReg(SrcReg, getKillRegState(KillSrc));
4363 return;
4364 }
4365
4366 if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
4367 // FIXME: We use a fatal error here because historically LLVM has tried
4368 // lower some of these physreg copies and we want to ensure we get
4369 // reasonable bug reports if someone encounters a case no other testing
4370 // found. This path should be removed after the LLVM 7 release.
4371 report_fatal_error("Unable to copy EFLAGS physical register!");
4372 }
4373
4374 LLVM_DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to "
4375 << RI.getName(DestReg) << '\n');
4376 report_fatal_error("Cannot emit physreg copy instruction");
4377}
4378
4379std::optional<DestSourcePair>
4381 if (MI.isMoveReg()) {
4382 // FIXME: Dirty hack for apparent invariant that doesn't hold when
4383 // subreg_to_reg is coalesced with ordinary copies, such that the bits that
4384 // were asserted as 0 are now undef.
4385 if (MI.getOperand(0).isUndef() && MI.getOperand(0).getSubReg())
4386 return std::nullopt;
4387
4388 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
4389 }
4390 return std::nullopt;
4391}
4392
4393static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI) {
4394 if (STI.hasFP16())
4395 return Load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
4396 if (Load)
4397 return STI.hasAVX512() ? X86::VMOVSSZrm
4398 : STI.hasAVX() ? X86::VMOVSSrm
4399 : X86::MOVSSrm;
4400 else
4401 return STI.hasAVX512() ? X86::VMOVSSZmr
4402 : STI.hasAVX() ? X86::VMOVSSmr
4403 : X86::MOVSSmr;
4404}
4405
4407 const TargetRegisterClass *RC,
4408 bool IsStackAligned,
4409 const X86Subtarget &STI, bool Load) {
4410 bool HasAVX = STI.hasAVX();
4411 bool HasAVX512 = STI.hasAVX512();
4412 bool HasVLX = STI.hasVLX();
4413 bool HasEGPR = STI.hasEGPR();
4414
4415 assert(RC != nullptr && "Invalid target register class");
4416 switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
4417 default:
4418 llvm_unreachable("Unknown spill size");
4419 case 1:
4420 assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
4421 if (STI.is64Bit())
4422 // Copying to or from a physical H register on x86-64 requires a NOREX
4423 // move. Otherwise use a normal move.
4424 if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
4425 return Load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
4426 return Load ? X86::MOV8rm : X86::MOV8mr;
4427 case 2:
4428 if (X86::VK16RegClass.hasSubClassEq(RC))
4429 return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
4430 : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
4431 assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
4432 return Load ? X86::MOV16rm : X86::MOV16mr;
4433 case 4:
4434 if (X86::GR32RegClass.hasSubClassEq(RC))
4435 return Load ? X86::MOV32rm : X86::MOV32mr;
4436 if (X86::FR32XRegClass.hasSubClassEq(RC))
4437 return Load ? (HasAVX512 ? X86::VMOVSSZrm_alt
4438 : HasAVX ? X86::VMOVSSrm_alt
4439 : X86::MOVSSrm_alt)
4440 : (HasAVX512 ? X86::VMOVSSZmr
4441 : HasAVX ? X86::VMOVSSmr
4442 : X86::MOVSSmr);
4443 if (X86::RFP32RegClass.hasSubClassEq(RC))
4444 return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
4445 if (X86::VK32RegClass.hasSubClassEq(RC)) {
4446 assert(STI.hasBWI() && "KMOVD requires BWI");
4447 return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
4448 : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
4449 }
4450 // All of these mask pair classes have the same spill size, the same kind
4451 // of kmov instructions can be used with all of them.
4452 if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
4453 X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
4454 X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
4455 X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
4456 X86::VK16PAIRRegClass.hasSubClassEq(RC))
4457 return Load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
4458 if (X86::FR16RegClass.hasSubClassEq(RC) ||
4459 X86::FR16XRegClass.hasSubClassEq(RC))
4460 return getLoadStoreOpcodeForFP16(Load, STI);
4461 llvm_unreachable("Unknown 4-byte regclass");
4462 case 8:
4463 if (X86::GR64RegClass.hasSubClassEq(RC))
4464 return Load ? X86::MOV64rm : X86::MOV64mr;
4465 if (X86::FR64XRegClass.hasSubClassEq(RC))
4466 return Load ? (HasAVX512 ? X86::VMOVSDZrm_alt
4467 : HasAVX ? X86::VMOVSDrm_alt
4468 : X86::MOVSDrm_alt)
4469 : (HasAVX512 ? X86::VMOVSDZmr
4470 : HasAVX ? X86::VMOVSDmr
4471 : X86::MOVSDmr);
4472 if (X86::VR64RegClass.hasSubClassEq(RC))
4473 return Load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
4474 if (X86::RFP64RegClass.hasSubClassEq(RC))
4475 return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
4476 if (X86::VK64RegClass.hasSubClassEq(RC)) {
4477 assert(STI.hasBWI() && "KMOVQ requires BWI");
4478 return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
4479 : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
4480 }
4481 llvm_unreachable("Unknown 8-byte regclass");
4482 case 10:
4483 assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
4484 return Load ? X86::LD_Fp80m : X86::ST_FpP80m;
4485 case 16: {
4486 if (X86::VR128XRegClass.hasSubClassEq(RC)) {
4487 // If stack is realigned we can use aligned stores.
4488 if (IsStackAligned)
4489 return Load ? (HasVLX ? X86::VMOVAPSZ128rm
4490 : HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX
4491 : HasAVX ? X86::VMOVAPSrm
4492 : X86::MOVAPSrm)
4493 : (HasVLX ? X86::VMOVAPSZ128mr
4494 : HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX
4495 : HasAVX ? X86::VMOVAPSmr
4496 : X86::MOVAPSmr);
4497 else
4498 return Load ? (HasVLX ? X86::VMOVUPSZ128rm
4499 : HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX
4500 : HasAVX ? X86::VMOVUPSrm
4501 : X86::MOVUPSrm)
4502 : (HasVLX ? X86::VMOVUPSZ128mr
4503 : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX
4504 : HasAVX ? X86::VMOVUPSmr
4505 : X86::MOVUPSmr);
4506 }
4507 llvm_unreachable("Unknown 16-byte regclass");
4508 }
4509 case 32:
4510 assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
4511 // If stack is realigned we can use aligned stores.
4512 if (IsStackAligned)
4513 return Load ? (HasVLX ? X86::VMOVAPSZ256rm
4514 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
4515 : X86::VMOVAPSYrm)
4516 : (HasVLX ? X86::VMOVAPSZ256mr
4517 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
4518 : X86::VMOVAPSYmr);
4519 else
4520 return Load ? (HasVLX ? X86::VMOVUPSZ256rm
4521 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
4522 : X86::VMOVUPSYrm)
4523 : (HasVLX ? X86::VMOVUPSZ256mr
4524 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
4525 : X86::VMOVUPSYmr);
4526 case 64:
4527 assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
4528 assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
4529 if (IsStackAligned)
4530 return Load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
4531 else
4532 return Load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
4533 case 1024:
4534 assert(X86::TILERegClass.hasSubClassEq(RC) && "Unknown 1024-byte regclass");
4535 assert(STI.hasAMXTILE() && "Using 8*1024-bit register requires AMX-TILE");
4536#define GET_EGPR_IF_ENABLED(OPC) (STI.hasEGPR() ? OPC##_EVEX : OPC)
4537 return Load ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
4538 : GET_EGPR_IF_ENABLED(X86::TILESTORED);
4539#undef GET_EGPR_IF_ENABLED
4540 }
4541}
4542
4543std::optional<ExtAddrMode>
4545 const TargetRegisterInfo *TRI) const {
4546 const MCInstrDesc &Desc = MemI.getDesc();
4547 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
4548 if (MemRefBegin < 0)
4549 return std::nullopt;
4550
4551 MemRefBegin += X86II::getOperandBias(Desc);
4552
4553 auto &BaseOp = MemI.getOperand(MemRefBegin + X86::AddrBaseReg);
4554 if (!BaseOp.isReg()) // Can be an MO_FrameIndex
4555 return std::nullopt;
4556
4557 const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp);
4558 // Displacement can be symbolic
4559 if (!DispMO.isImm())
4560 return std::nullopt;
4561
4562 ExtAddrMode AM;
4563 AM.BaseReg = BaseOp.getReg();
4564 AM.ScaledReg = MemI.getOperand(MemRefBegin + X86::AddrIndexReg).getReg();
4565 AM.Scale = MemI.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm();
4566 AM.Displacement = DispMO.getImm();
4567 return AM;
4568}
4569
4571 StringRef &ErrInfo) const {
4572 std::optional<ExtAddrMode> AMOrNone = getAddrModeFromMemoryOp(MI, nullptr);
4573 if (!AMOrNone)
4574 return true;
4575
4576 ExtAddrMode AM = *AMOrNone;
4578 if (AM.ScaledReg != X86::NoRegister) {
4579 switch (AM.Scale) {
4580 case 1:
4581 case 2:
4582 case 4:
4583 case 8:
4584 break;
4585 default:
4586 ErrInfo = "Scale factor in address must be 1, 2, 4 or 8";
4587 return false;
4588 }
4589 }
4590 if (!isInt<32>(AM.Displacement)) {
4591 ErrInfo = "Displacement in address must fit into 32-bit signed "
4592 "integer";
4593 return false;
4594 }
4595
4596 return true;
4597}
4598
4600 const Register Reg,
4601 int64_t &ImmVal) const {
4602 Register MovReg = Reg;
4603 const MachineInstr *MovMI = &MI;
4604
4605 // Follow use-def for SUBREG_TO_REG to find the real move immediate
4606 // instruction. It is quite common for x86-64.
4607 if (MI.isSubregToReg()) {
4608 // We use following pattern to setup 64b immediate.
4609 // %8:gr32 = MOV32r0 implicit-def dead $eflags
4610 // %6:gr64 = SUBREG_TO_REG 0, killed %8:gr32, %subreg.sub_32bit
4611 if (!MI.getOperand(1).isImm())
4612 return false;
4613 unsigned FillBits = MI.getOperand(1).getImm();
4614 unsigned SubIdx = MI.getOperand(3).getImm();
4615 MovReg = MI.getOperand(2).getReg();
4616 if (SubIdx != X86::sub_32bit || FillBits != 0)
4617 return false;
4618 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4619 MovMI = MRI.getUniqueVRegDef(MovReg);
4620 if (!MovMI)
4621 return false;
4622 }
4623
4624 if (MovMI->getOpcode() == X86::MOV32r0 &&
4625 MovMI->getOperand(0).getReg() == MovReg) {
4626 ImmVal = 0;
4627 return true;
4628 }
4629
4630 if (MovMI->getOpcode() != X86::MOV32ri &&
4631 MovMI->getOpcode() != X86::MOV64ri &&
4632 MovMI->getOpcode() != X86::MOV32ri64 && MovMI->getOpcode() != X86::MOV8ri)
4633 return false;
4634 // Mov Src can be a global address.
4635 if (!MovMI->getOperand(1).isImm() || MovMI->getOperand(0).getReg() != MovReg)
4636 return false;
4637 ImmVal = MovMI->getOperand(1).getImm();
4638 return true;
4639}
4640
4642 const MachineInstr *MI, const Register NullValueReg,
4643 const TargetRegisterInfo *TRI) const {
4644 if (!MI->modifiesRegister(NullValueReg, TRI))
4645 return true;
4646 switch (MI->getOpcode()) {
4647 // Shift right/left of a null unto itself is still a null, i.e. rax = shl rax
4648 // X.
4649 case X86::SHR64ri:
4650 case X86::SHR32ri:
4651 case X86::SHL64ri:
4652 case X86::SHL32ri:
4653 assert(MI->getOperand(0).isDef() && MI->getOperand(1).isUse() &&
4654 "expected for shift opcode!");
4655 return MI->getOperand(0).getReg() == NullValueReg &&
4656 MI->getOperand(1).getReg() == NullValueReg;
4657 // Zero extend of a sub-reg of NullValueReg into itself does not change the
4658 // null value.
4659 case X86::MOV32rr:
4660 return llvm::all_of(MI->operands(), [&](const MachineOperand &MO) {
4661 return TRI->isSubRegisterEq(NullValueReg, MO.getReg());
4662 });
4663 default:
4664 return false;
4665 }
4666 llvm_unreachable("Should be handled above!");
4667}
4668
4671 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
4672 const TargetRegisterInfo *TRI) const {
4673 const MCInstrDesc &Desc = MemOp.getDesc();
4674 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
4675 if (MemRefBegin < 0)
4676 return false;
4677
4678 MemRefBegin += X86II::getOperandBias(Desc);
4679
4680 const MachineOperand *BaseOp =
4681 &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
4682 if (!BaseOp->isReg()) // Can be an MO_FrameIndex
4683 return false;
4684
4685 if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
4686 return false;
4687
4688 if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
4689 X86::NoRegister)
4690 return false;
4691
4692 const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp);
4693
4694 // Displacement can be symbolic
4695 if (!DispMO.isImm())
4696 return false;
4697
4698 Offset = DispMO.getImm();
4699
4700 if (!BaseOp->isReg())
4701 return false;
4702
4703 OffsetIsScalable = false;
4704 // FIXME: Relying on memoperands() may not be right thing to do here. Check
4705 // with X86 maintainers, and fix it accordingly. For now, it is ok, since
4706 // there is no use of `Width` for X86 back-end at the moment.
4707 Width =
4708 !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
4709 BaseOps.push_back(BaseOp);
4710 return true;
4711}
4712
4713static unsigned getStoreRegOpcode(Register SrcReg,
4714 const TargetRegisterClass *RC,
4715 bool IsStackAligned,
4716 const X86Subtarget &STI) {
4717 return getLoadStoreRegOpcode(SrcReg, RC, IsStackAligned, STI, false);
4718}
4719
4720static unsigned getLoadRegOpcode(Register DestReg,
4721 const TargetRegisterClass *RC,
4722 bool IsStackAligned, const X86Subtarget &STI) {
4723 return getLoadStoreRegOpcode(DestReg, RC, IsStackAligned, STI, true);
4724}
4725
4726static bool isAMXOpcode(unsigned Opc) {
4727 switch (Opc) {
4728 default:
4729 return false;
4730 case X86::TILELOADD:
4731 case X86::TILESTORED:
4732 case X86::TILELOADD_EVEX:
4733 case X86::TILESTORED_EVEX:
4734 return true;
4735 }
4736}
4737
4740 unsigned Opc, Register Reg, int FrameIdx,
4741 bool isKill) const {
4742 switch (Opc) {
4743 default:
4744 llvm_unreachable("Unexpected special opcode!");
4745 case X86::TILESTORED:
4746 case X86::TILESTORED_EVEX: {
4747 // tilestored %tmm, (%sp, %idx)
4749 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
4750 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
4751 MachineInstr *NewMI =
4752 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
4753 .addReg(Reg, getKillRegState(isKill));
4755 MO.setReg(VirtReg);
4756 MO.setIsKill(true);
4757 break;
4758 }
4759 case X86::TILELOADD:
4760 case X86::TILELOADD_EVEX: {
4761 // tileloadd (%sp, %idx), %tmm
4763 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
4764 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
4766 BuildMI(MBB, MI, DebugLoc(), get(Opc), Reg), FrameIdx);
4768 MO.setReg(VirtReg);
4769 MO.setIsKill(true);
4770 break;
4771 }
4772 }
4773}
4774
4777 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
4778 const TargetRegisterInfo *TRI, Register VReg) const {
4779 const MachineFunction &MF = *MBB.getParent();
4780 const MachineFrameInfo &MFI = MF.getFrameInfo();
4781 assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
4782 "Stack slot too small for store");
4783
4784 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
4785 bool isAligned =
4786 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
4787 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
4788
4789 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
4790 if (isAMXOpcode(Opc))
4791 loadStoreTileReg(MBB, MI, Opc, SrcReg, FrameIdx, isKill);
4792 else
4793 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
4794 .addReg(SrcReg, getKillRegState(isKill));
4795}
4796
4799 Register DestReg, int FrameIdx,
4800 const TargetRegisterClass *RC,
4801 const TargetRegisterInfo *TRI,
4802 Register VReg) const {
4803 const MachineFunction &MF = *MBB.getParent();
4804 const MachineFrameInfo &MFI = MF.getFrameInfo();
4805 assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
4806 "Load size exceeds stack slot");
4807 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
4808 bool isAligned =
4809 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
4810 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
4811
4812 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
4813 if (isAMXOpcode(Opc))
4814 loadStoreTileReg(MBB, MI, Opc, DestReg, FrameIdx);
4815 else
4816 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
4817 FrameIdx);
4818}
4819
4821 Register &SrcReg2, int64_t &CmpMask,
4822 int64_t &CmpValue) const {
4823 switch (MI.getOpcode()) {
4824 default:
4825 break;
4826 case X86::CMP64ri32:
4827 case X86::CMP32ri:
4828 case X86::CMP16ri:
4829 case X86::CMP8ri:
4830 SrcReg = MI.getOperand(0).getReg();
4831 SrcReg2 = 0;
4832 if (MI.getOperand(1).isImm()) {
4833 CmpMask = ~0;
4834 CmpValue = MI.getOperand(1).getImm();
4835 } else {
4836 CmpMask = CmpValue = 0;
4837 }
4838 return true;
4839 // A SUB can be used to perform comparison.
4840 CASE_ND(SUB64rm)
4841 CASE_ND(SUB32rm)
4842 CASE_ND(SUB16rm)
4843 CASE_ND(SUB8rm)
4844 SrcReg = MI.getOperand(1).getReg();
4845 SrcReg2 = 0;
4846 CmpMask = 0;
4847 CmpValue = 0;
4848 return true;
4849 CASE_ND(SUB64rr)
4850 CASE_ND(SUB32rr)
4851 CASE_ND(SUB16rr)
4852 CASE_ND(SUB8rr)
4853 SrcReg = MI.getOperand(1).getReg();
4854 SrcReg2 = MI.getOperand(2).getReg();
4855 CmpMask = 0;
4856 CmpValue = 0;
4857 return true;
4858 CASE_ND(SUB64ri32)
4859 CASE_ND(SUB32ri)
4860 CASE_ND(SUB16ri)
4861 CASE_ND(SUB8ri)
4862 SrcReg = MI.getOperand(1).getReg();
4863 SrcReg2 = 0;
4864 if (MI.getOperand(2).isImm()) {
4865 CmpMask = ~0;
4866 CmpValue = MI.getOperand(2).getImm();
4867 } else {
4868 CmpMask = CmpValue = 0;
4869 }
4870 return true;
4871 case X86::CMP64rr:
4872 case X86::CMP32rr:
4873 case X86::CMP16rr:
4874 case X86::CMP8rr:
4875 SrcReg = MI.getOperand(0).getReg();
4876 SrcReg2 = MI.getOperand(1).getReg();
4877 CmpMask = 0;
4878 CmpValue = 0;
4879 return true;
4880 case X86::TEST8rr:
4881 case X86::TEST16rr:
4882 case X86::TEST32rr:
4883 case X86::TEST64rr:
4884 SrcReg = MI.getOperand(0).getReg();
4885 if (MI.getOperand(1).getReg() != SrcReg)
4886 return false;
4887 // Compare against zero.
4888 SrcReg2 = 0;
4889 CmpMask = ~0;
4890 CmpValue = 0;
4891 return true;
4892 }
4893 return false;
4894}
4895
4896bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
4897 Register SrcReg, Register SrcReg2,
4898 int64_t ImmMask, int64_t ImmValue,
4899 const MachineInstr &OI, bool *IsSwapped,
4900 int64_t *ImmDelta) const {
4901 switch (OI.getOpcode()) {
4902 case X86::CMP64rr:
4903 case X86::CMP32rr:
4904 case X86::CMP16rr:
4905 case X86::CMP8rr:
4906 CASE_ND(SUB64rr)
4907 CASE_ND(SUB32rr)
4908 CASE_ND(SUB16rr)
4909 CASE_ND(SUB8rr) {
4910 Register OISrcReg;
4911 Register OISrcReg2;
4912 int64_t OIMask;
4913 int64_t OIValue;
4914 if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
4915 OIMask != ImmMask || OIValue != ImmValue)
4916 return false;
4917 if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
4918 *IsSwapped = false;
4919 return true;
4920 }
4921 if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
4922 *IsSwapped = true;
4923 return true;
4924 }
4925 return false;
4926 }
4927 case X86::CMP64ri32:
4928 case X86::CMP32ri:
4929 case X86::CMP16ri:
4930 case X86::CMP8ri:
4931 CASE_ND(SUB64ri32)
4932 CASE_ND(SUB32ri)
4933 CASE_ND(SUB16ri)
4934 CASE_ND(SUB8ri)
4935 case X86::TEST64rr:
4936 case X86::TEST32rr:
4937 case X86::TEST16rr:
4938 case X86::TEST8rr: {
4939 if (ImmMask != 0) {
4940 Register OISrcReg;
4941 Register OISrcReg2;
4942 int64_t OIMask;
4943 int64_t OIValue;
4944 if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
4945 SrcReg == OISrcReg && ImmMask == OIMask) {
4946 if (OIValue == ImmValue) {
4947 *ImmDelta = 0;
4948 return true;
4949 } else if (static_cast<uint64_t>(ImmValue) ==
4950 static_cast<uint64_t>(OIValue) - 1) {
4951 *ImmDelta = -1;
4952 return true;
4953 } else if (static_cast<uint64_t>(ImmValue) ==
4954 static_cast<uint64_t>(OIValue) + 1) {
4955 *ImmDelta = 1;
4956 return true;
4957 } else {
4958 return false;
4959 }
4960 }
4961 }
4962 return FlagI.isIdenticalTo(OI);
4963 }
4964 default:
4965 return false;
4966 }
4967}
4968
4969/// Check whether the definition can be converted
4970/// to remove a comparison against zero.
4971inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
4972 bool &ClearsOverflowFlag) {
4973 NoSignFlag = false;
4974 ClearsOverflowFlag = false;
4975
4976 // "ELF Handling for Thread-Local Storage" specifies that x86-64 GOTTPOFF, and
4977 // i386 GOTNTPOFF/INDNTPOFF relocations can convert an ADD to a LEA during
4978 // Initial Exec to Local Exec relaxation. In these cases, we must not depend
4979 // on the EFLAGS modification of ADD actually happening in the final binary.
4980 if (MI.getOpcode() == X86::ADD64rm || MI.getOpcode() == X86::ADD32rm) {
4981 unsigned Flags = MI.getOperand(5).getTargetFlags();
4982 if (Flags == X86II::MO_GOTTPOFF || Flags == X86II::MO_INDNTPOFF ||
4983 Flags == X86II::MO_GOTNTPOFF)
4984 return false;
4985 }
4986
4987 switch (MI.getOpcode()) {
4988 default:
4989 return false;
4990
4991 // The shift instructions only modify ZF if their shift count is non-zero.
4992 // N.B.: The processor truncates the shift count depending on the encoding.
4993 CASE_ND(SAR8ri)
4994 CASE_ND(SAR16ri)
4995 CASE_ND(SAR32ri)
4996 CASE_ND(SAR64ri)
4997 CASE_ND(SHR8ri)
4998 CASE_ND(SHR16ri)
4999 CASE_ND(SHR32ri)
5000 CASE_ND(SHR64ri)
5001 return getTruncatedShiftCount(MI, 2) != 0;
5002
5003 // Some left shift instructions can be turned into LEA instructions but only
5004 // if their flags aren't used. Avoid transforming such instructions.
5005 CASE_ND(SHL8ri)
5006 CASE_ND(SHL16ri)
5007 CASE_ND(SHL32ri)
5008 CASE_ND(SHL64ri) {
5009 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
5010 if (isTruncatedShiftCountForLEA(ShAmt))
5011 return false;
5012 return ShAmt != 0;
5013 }
5014
5015 CASE_ND(SHRD16rri8)
5016 CASE_ND(SHRD32rri8)
5017 CASE_ND(SHRD64rri8)
5018 CASE_ND(SHLD16rri8)
5019 CASE_ND(SHLD32rri8)
5020 CASE_ND(SHLD64rri8)
5021 return getTruncatedShiftCount(MI, 3) != 0;
5022
5023 CASE_ND(SUB64ri32)
5024 CASE_ND(SUB32ri)
5025 CASE_ND(SUB16ri)
5026 CASE_ND(SUB8ri)
5027 CASE_ND(SUB64rr)
5028 CASE_ND(SUB32rr)
5029 CASE_ND(SUB16rr)
5030 CASE_ND(SUB8rr)
5031 CASE_ND(SUB64rm)
5032 CASE_ND(SUB32rm)
5033 CASE_ND(SUB16rm)
5034 CASE_ND(SUB8rm)
5035 CASE_ND(DEC64r)
5036 CASE_ND(DEC32r)
5037 CASE_ND(DEC16r)
5038 CASE_ND(DEC8r)
5039 CASE_ND(ADD64ri32)
5040 CASE_ND(ADD32ri)
5041 CASE_ND(ADD16ri)
5042 CASE_ND(ADD8ri)
5043 CASE_ND(ADD64rr)
5044 CASE_ND(ADD32rr)
5045 CASE_ND(ADD16rr)
5046 CASE_ND(ADD8rr)
5047 CASE_ND(ADD64rm)
5048 CASE_ND(ADD32rm)
5049 CASE_ND(ADD16rm)
5050 CASE_ND(ADD8rm)
5051 CASE_ND(INC64r)
5052 CASE_ND(INC32r)
5053 CASE_ND(INC16r)
5054 CASE_ND(INC8r)
5055 CASE_ND(ADC64ri32)
5056 CASE_ND(ADC32ri)
5057 CASE_ND(ADC16ri)
5058 CASE_ND(ADC8ri)
5059 CASE_ND(ADC64rr)
5060 CASE_ND(ADC32rr)
5061 CASE_ND(ADC16rr)
5062 CASE_ND(ADC8rr)
5063 CASE_ND(ADC64rm)
5064 CASE_ND(ADC32rm)
5065 CASE_ND(ADC16rm)
5066 CASE_ND(ADC8rm)
5067 CASE_ND(SBB64ri32)
5068 CASE_ND(SBB32ri)
5069 CASE_ND(SBB16ri)
5070 CASE_ND(SBB8ri)
5071 CASE_ND(SBB64rr)
5072 CASE_ND(SBB32rr)
5073 CASE_ND(SBB16rr)
5074 CASE_ND(SBB8rr)
5075 CASE_ND(SBB64rm)
5076 CASE_ND(SBB32rm)
5077 CASE_ND(SBB16rm)
5078 CASE_ND(SBB8rm)
5079 CASE_ND(NEG8r)
5080 CASE_ND(NEG16r)
5081 CASE_ND(NEG32r)
5082 CASE_ND(NEG64r)
5083 case X86::LZCNT16rr:
5084 case X86::LZCNT16rm:
5085 case X86::LZCNT32rr:
5086 case X86::LZCNT32rm:
5087 case X86::LZCNT64rr:
5088 case X86::LZCNT64rm:
5089 case X86::POPCNT16rr:
5090 case X86::POPCNT16rm:
5091 case X86::POPCNT32rr:
5092 case X86::POPCNT32rm:
5093 case X86::POPCNT64rr:
5094 case X86::POPCNT64rm:
5095 case X86::TZCNT16rr:
5096 case X86::TZCNT16rm:
5097 case X86::TZCNT32rr:
5098 case X86::TZCNT32rm:
5099 case X86::TZCNT64rr:
5100 case X86::TZCNT64rm:
5101 return true;
5102 CASE_ND(AND64ri32)
5103 CASE_ND(AND32ri)
5104 CASE_ND(AND16ri)
5105 CASE_ND(AND8ri)
5106 CASE_ND(AND64rr)
5107 CASE_ND(AND32rr)
5108 CASE_ND(AND16rr)
5109 CASE_ND(AND8rr)
5110 CASE_ND(AND64rm)
5111 CASE_ND(AND32rm)
5112 CASE_ND(AND16rm)
5113 CASE_ND(AND8rm)
5114 CASE_ND(XOR64ri32)
5115 CASE_ND(XOR32ri)
5116 CASE_ND(XOR16ri)
5117 CASE_ND(XOR8ri)
5118 CASE_ND(XOR64rr)
5119 CASE_ND(XOR32rr)
5120 CASE_ND(XOR16rr)
5121 CASE_ND(XOR8rr)
5122 CASE_ND(XOR64rm)
5123 CASE_ND(XOR32rm)
5124 CASE_ND(XOR16rm)
5125 CASE_ND(XOR8rm)
5126 CASE_ND(OR64ri32)
5127 CASE_ND(OR32ri)
5128 CASE_ND(OR16ri)
5129 CASE_ND(OR8ri)
5130 CASE_ND(OR64rr)
5131 CASE_ND(OR32rr)
5132 CASE_ND(OR16rr)
5133 CASE_ND(OR8rr)
5134 CASE_ND(OR64rm)
5135 CASE_ND(OR32rm)
5136 CASE_ND(OR16rm)
5137 CASE_ND(OR8rm)
5138 case X86::ANDN32rr:
5139 case X86::ANDN32rm:
5140 case X86::ANDN64rr:
5141 case X86::ANDN64rm:
5142 case X86::BLSI32rr:
5143 case X86::BLSI32rm:
5144 case X86::BLSI64rr:
5145 case X86::BLSI64rm:
5146 case X86::BLSMSK32rr:
5147 case X86::BLSMSK32rm:
5148 case X86::BLSMSK64rr:
5149 case X86::BLSMSK64rm:
5150 case X86::BLSR32rr:
5151 case X86::BLSR32rm:
5152 case X86::BLSR64rr:
5153 case X86::BLSR64rm:
5154 case X86::BLCFILL32rr:
5155 case X86::BLCFILL32rm:
5156 case X86::BLCFILL64rr:
5157 case X86::BLCFILL64rm:
5158 case X86::BLCI32rr:
5159 case X86::BLCI32rm:
5160 case X86::BLCI64rr:
5161 case X86::BLCI64rm:
5162 case X86::BLCIC32rr:
5163 case X86::BLCIC32rm:
5164 case X86::BLCIC64rr:
5165 case X86::BLCIC64rm:
5166 case X86::BLCMSK32rr:
5167 case X86::BLCMSK32rm:
5168 case X86::BLCMSK64rr:
5169 case X86::BLCMSK64rm:
5170 case X86::BLCS32rr:
5171 case X86::BLCS32rm:
5172 case X86::BLCS64rr:
5173 case X86::BLCS64rm:
5174 case X86::BLSFILL32rr:
5175 case X86::BLSFILL32rm:
5176 case X86::BLSFILL64rr:
5177 case X86::BLSFILL64rm:
5178 case X86::BLSIC32rr:
5179 case X86::BLSIC32rm:
5180 case X86::BLSIC64rr:
5181 case X86::BLSIC64rm:
5182 case X86::BZHI32rr:
5183 case X86::BZHI32rm:
5184 case X86::BZHI64rr:
5185 case X86::BZHI64rm:
5186 case X86::T1MSKC32rr:
5187 case X86::T1MSKC32rm:
5188 case X86::T1MSKC64rr:
5189 case X86::T1MSKC64rm:
5190 case X86::TZMSK32rr:
5191 case X86::TZMSK32rm:
5192 case X86::TZMSK64rr:
5193 case X86::TZMSK64rm:
5194 // These instructions clear the overflow flag just like TEST.
5195 // FIXME: These are not the only instructions in this switch that clear the
5196 // overflow flag.
5197 ClearsOverflowFlag = true;
5198 return true;
5199 case X86::BEXTR32rr:
5200 case X86::BEXTR64rr:
5201 case X86::BEXTR32rm:
5202 case X86::BEXTR64rm:
5203 case X86::BEXTRI32ri:
5204 case X86::BEXTRI32mi:
5205 case X86::BEXTRI64ri:
5206 case X86::BEXTRI64mi:
5207 // BEXTR doesn't update the sign flag so we can't use it. It does clear
5208 // the overflow flag, but that's not useful without the sign flag.
5209 NoSignFlag = true;
5210 return true;
5211 }
5212}
5213
5214/// Check whether the use can be converted to remove a comparison against zero.
5216 switch (MI.getOpcode()) {
5217 default:
5218 return X86::COND_INVALID;
5219 CASE_ND(NEG8r)
5220 CASE_ND(NEG16r)
5221 CASE_ND(NEG32r)
5222 CASE_ND(NEG64r)
5223 return X86::COND_AE;
5224 case X86::LZCNT16rr:
5225 case X86::LZCNT32rr:
5226 case X86::LZCNT64rr:
5227 return X86::COND_B;
5228 case X86::POPCNT16rr:
5229 case X86::POPCNT32rr:
5230 case X86::POPCNT64rr:
5231 return X86::COND_E;
5232 case X86::TZCNT16rr:
5233 case X86::TZCNT32rr:
5234 case X86::TZCNT64rr:
5235 return X86::COND_B;
5236 case X86::BSF16rr:
5237 case X86::BSF32rr:
5238 case X86::BSF64rr:
5239 case X86::BSR16rr:
5240 case X86::BSR32rr:
5241 case X86::BSR64rr:
5242 return X86::COND_E;
5243 case X86::BLSI32rr:
5244 case X86::BLSI64rr:
5245 return X86::COND_AE;
5246 case X86::BLSR32rr:
5247 case X86::BLSR64rr:
5248 case X86::BLSMSK32rr:
5249 case X86::BLSMSK64rr:
5250 return X86::COND_B;
5251 // TODO: TBM instructions.
5252 }
5253}
5254
5255/// Check if there exists an earlier instruction that
5256/// operates on the same source operands and sets flags in the same way as
5257/// Compare; remove Compare if possible.
5259 Register SrcReg2, int64_t CmpMask,
5260 int64_t CmpValue,
5261 const MachineRegisterInfo *MRI) const {
5262 // Check whether we can replace SUB with CMP.
5263 switch (CmpInstr.getOpcode()) {
5264 default:
5265 break;
5266 CASE_ND(SUB64ri32)
5267 CASE_ND(SUB32ri)
5268 CASE_ND(SUB16ri)
5269 CASE_ND(SUB8ri)
5270 CASE_ND(SUB64rm)
5271 CASE_ND(SUB32rm)
5272 CASE_ND(SUB16rm)
5273 CASE_ND(SUB8rm)
5274 CASE_ND(SUB64rr)
5275 CASE_ND(SUB32rr)
5276 CASE_ND(SUB16rr)
5277 CASE_ND(SUB8rr) {
5278 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
5279 return false;
5280 // There is no use of the destination register, we can replace SUB with CMP.
5281 unsigned NewOpcode = 0;
5282#define FROM_TO(A, B) \
5283 CASE_ND(A) NewOpcode = X86::B; \
5284 break;
5285 switch (CmpInstr.getOpcode()) {
5286 default:
5287 llvm_unreachable("Unreachable!");
5288 FROM_TO(SUB64rm, CMP64rm)
5289 FROM_TO(SUB32rm, CMP32rm)
5290 FROM_TO(SUB16rm, CMP16rm)
5291 FROM_TO(SUB8rm, CMP8rm)
5292 FROM_TO(SUB64rr, CMP64rr)
5293 FROM_TO(SUB32rr, CMP32rr)
5294 FROM_TO(SUB16rr, CMP16rr)
5295 FROM_TO(SUB8rr, CMP8rr)
5296 FROM_TO(SUB64ri32, CMP64ri32)
5297 FROM_TO(SUB32ri, CMP32ri)
5298 FROM_TO(SUB16ri, CMP16ri)
5299 FROM_TO(SUB8ri, CMP8ri)
5300 }
5301#undef FROM_TO
5302 CmpInstr.setDesc(get(NewOpcode));
5303 CmpInstr.removeOperand(0);
5304 // Mutating this instruction invalidates any debug data associated with it.
5305 CmpInstr.dropDebugNumber();
5306 // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
5307 if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
5308 NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
5309 return false;
5310 }
5311 }
5312
5313 // The following code tries to remove the comparison by re-using EFLAGS
5314 // from earlier instructions.
5315
5316 bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
5317
5318 // Transformation currently requires SSA values.
5319 if (SrcReg2.isPhysical())
5320 return false;
5321 MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
5322 assert(SrcRegDef && "Must have a definition (SSA)");
5323
5324 MachineInstr *MI = nullptr;
5325 MachineInstr *Sub = nullptr;
5326 MachineInstr *Movr0Inst = nullptr;
5327 bool NoSignFlag = false;
5328 bool ClearsOverflowFlag = false;
5329 bool ShouldUpdateCC = false;
5330 bool IsSwapped = false;
5332 int64_t ImmDelta = 0;
5333
5334 // Search backward from CmpInstr for the next instruction defining EFLAGS.
5336 MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
5338 std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
5339 for (MachineBasicBlock *MBB = &CmpMBB;;) {
5340 for (MachineInstr &Inst : make_range(From, MBB->rend())) {
5341 // Try to use EFLAGS from the instruction defining %SrcReg. Example:
5342 // %eax = addl ...
5343 // ... // EFLAGS not changed
5344 // testl %eax, %eax // <-- can be removed
5345 if (&Inst == SrcRegDef) {
5346 if (IsCmpZero &&
5347 isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
5348 MI = &Inst;
5349 break;
5350 }
5351
5352 // Look back for the following pattern, in which case the
5353 // test16rr/test64rr instruction could be erased.
5354 //
5355 // Example for test16rr:
5356 // %reg = and32ri %in_reg, 5
5357 // ... // EFLAGS not changed.
5358 // %src_reg = copy %reg.sub_16bit:gr32
5359 // test16rr %src_reg, %src_reg, implicit-def $eflags
5360 // Example for test64rr:
5361 // %reg = and32ri %in_reg, 5
5362 // ... // EFLAGS not changed.
5363 // %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
5364 // test64rr %src_reg, %src_reg, implicit-def $eflags
5365 MachineInstr *AndInstr = nullptr;
5366 if (IsCmpZero &&
5367 findRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI,
5368 NoSignFlag, ClearsOverflowFlag)) {
5369 assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode()));
5370 MI = AndInstr;
5371 break;
5372 }
5373 // Cannot find other candidates before definition of SrcReg.
5374 return false;
5375 }
5376
5377 if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
5378 // Try to use EFLAGS produced by an instruction reading %SrcReg.
5379 // Example:
5380 // %eax = ...
5381 // ...
5382 // popcntl %eax
5383 // ... // EFLAGS not changed
5384 // testl %eax, %eax // <-- can be removed
5385 if (IsCmpZero) {
5386 NewCC = isUseDefConvertible(Inst);
5387 if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
5388 Inst.getOperand(1).getReg() == SrcReg) {
5389 ShouldUpdateCC = true;
5390 MI = &Inst;
5391 break;
5392 }
5393 }
5394
5395 // Try to use EFLAGS from an instruction with similar flag results.
5396 // Example:
5397 // sub x, y or cmp x, y
5398 // ... // EFLAGS not changed
5399 // cmp x, y // <-- can be removed
5400 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
5401 Inst, &IsSwapped, &ImmDelta)) {
5402 Sub = &Inst;
5403 break;
5404 }
5405
5406 // MOV32r0 is implemented with xor which clobbers condition code. It is
5407 // safe to move up, if the definition to EFLAGS is dead and earlier
5408 // instructions do not read or write EFLAGS.
5409 if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
5410 Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
5411 Movr0Inst = &Inst;
5412 continue;
5413 }
5414
5415 // Cannot do anything for any other EFLAG changes.
5416 return false;
5417 }
5418 }
5419
5420 if (MI || Sub)
5421 break;
5422
5423 // Reached begin of basic block. Continue in predecessor if there is
5424 // exactly one.
5425 if (MBB->pred_size() != 1)
5426 return false;
5427 MBB = *MBB->pred_begin();
5428 From = MBB->rbegin();
5429 }
5430
5431 // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
5432 // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
5433 // If we are done with the basic block, we need to check whether EFLAGS is
5434 // live-out.
5435 bool FlagsMayLiveOut = true;
5437 MachineBasicBlock::iterator AfterCmpInstr =
5438 std::next(MachineBasicBlock::iterator(CmpInstr));
5439 for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
5440 bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
5441 bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
5442 // We should check the usage if this instruction uses and updates EFLAGS.
5443 if (!UseEFLAGS && ModifyEFLAGS) {
5444 // It is safe to remove CmpInstr if EFLAGS is updated again.
5445 FlagsMayLiveOut = false;
5446 break;
5447 }
5448 if (!UseEFLAGS && !ModifyEFLAGS)
5449 continue;
5450
5451 // EFLAGS is used by this instruction.
5452 X86::CondCode OldCC = X86::getCondFromMI(Instr);
5453 if ((MI || IsSwapped || ImmDelta != 0) && OldCC == X86::COND_INVALID)
5454 return false;
5455
5456 X86::CondCode ReplacementCC = X86::COND_INVALID;
5457 if (MI) {
5458 switch (OldCC) {
5459 default:
5460 break;
5461 case X86::COND_A:
5462 case X86::COND_AE:
5463 case X86::COND_B:
5464 case X86::COND_BE:
5465 // CF is used, we can't perform this optimization.
5466 return false;
5467 case X86::COND_G:
5468 case X86::COND_GE:
5469 case X86::COND_L:
5470 case X86::COND_LE:
5471 // If SF is used, but the instruction doesn't update the SF, then we
5472 // can't do the optimization.
5473 if (NoSignFlag)
5474 return false;
5475 [[fallthrough]];
5476 case X86::COND_O:
5477 case X86::COND_NO:
5478 // If OF is used, the instruction needs to clear it like CmpZero does.
5479 if (!ClearsOverflowFlag)
5480 return false;
5481 break;
5482 case X86::COND_S:
5483 case X86::COND_NS:
5484 // If SF is used, but the instruction doesn't update the SF, then we
5485 // can't do the optimization.
5486 if (NoSignFlag)
5487 return false;
5488 break;
5489 }
5490
5491 // If we're updating the condition code check if we have to reverse the
5492 // condition.
5493 if (ShouldUpdateCC)
5494 switch (OldCC) {
5495 default:
5496 return false;
5497 case X86::COND_E:
5498 ReplacementCC = NewCC;
5499 break;
5500 case X86::COND_NE:
5501 ReplacementCC = GetOppositeBranchCondition(NewCC);
5502 break;
5503 }
5504 } else if (IsSwapped) {
5505 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
5506 // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
5507 // We swap the condition code and synthesize the new opcode.
5508 ReplacementCC = getSwappedCondition(OldCC);
5509 if (ReplacementCC == X86::COND_INVALID)
5510 return false;
5511 ShouldUpdateCC = true;
5512 } else if (ImmDelta != 0) {
5513 unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
5514 // Shift amount for min/max constants to adjust for 8/16/32 instruction
5515 // sizes.
5516 switch (OldCC) {
5517 case X86::COND_L: // x <s (C + 1) --> x <=s C
5518 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
5519 return false;
5520 ReplacementCC = X86::COND_LE;
5521 break;
5522 case X86::COND_B: // x <u (C + 1) --> x <=u C
5523 if (ImmDelta != 1 || CmpValue == 0)
5524 return false;
5525 ReplacementCC = X86::COND_BE;
5526 break;
5527 case X86::COND_GE: // x >=s (C + 1) --> x >s C
5528 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
5529 return false;
5530 ReplacementCC = X86::COND_G;
5531 break;
5532 case X86::COND_AE: // x >=u (C + 1) --> x >u C
5533 if (ImmDelta != 1 || CmpValue == 0)
5534 return false;
5535 ReplacementCC = X86::COND_A;
5536 break;
5537 case X86::COND_G: // x >s (C - 1) --> x >=s C
5538 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
5539 return false;
5540 ReplacementCC = X86::COND_GE;
5541 break;
5542 case X86::COND_A: // x >u (C - 1) --> x >=u C
5543 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
5544 return false;
5545 ReplacementCC = X86::COND_AE;
5546 break;
5547 case X86::COND_LE: // x <=s (C - 1) --> x <s C
5548 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
5549 return false;
5550 ReplacementCC = X86::COND_L;
5551 break;
5552 case X86::COND_BE: // x <=u (C - 1) --> x <u C
5553 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
5554 return false;
5555 ReplacementCC = X86::COND_B;
5556 break;
5557 default:
5558 return false;
5559 }
5560 ShouldUpdateCC = true;
5561 }
5562
5563 if (ShouldUpdateCC && ReplacementCC != OldCC) {
5564 // Push the MachineInstr to OpsToUpdate.
5565 // If it is safe to remove CmpInstr, the condition code of these
5566 // instructions will be modified.
5567 OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
5568 }
5569 if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
5570 // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
5571 FlagsMayLiveOut = false;
5572 break;
5573 }
5574 }
5575
5576 // If we have to update users but EFLAGS is live-out abort, since we cannot
5577 // easily find all of the users.
5578 if ((MI != nullptr || ShouldUpdateCC) && FlagsMayLiveOut) {
5579 for (MachineBasicBlock *Successor : CmpMBB.successors())
5580 if (Successor->isLiveIn(X86::EFLAGS))
5581 return false;
5582 }
5583
5584 // The instruction to be updated is either Sub or MI.
5585 assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
5586 Sub = MI != nullptr ? MI : Sub;
5587 MachineBasicBlock *SubBB = Sub->getParent();
5588 // Move Movr0Inst to the appropriate place before Sub.
5589 if (Movr0Inst) {
5590 // Only move within the same block so we don't accidentally move to a
5591 // block with higher execution frequency.
5592 if (&CmpMBB != SubBB)
5593 return false;
5594 // Look backwards until we find a def that doesn't use the current EFLAGS.
5596 InsertE = Sub->getParent()->rend();
5597 for (; InsertI != InsertE; ++InsertI) {
5598 MachineInstr *Instr = &*InsertI;
5599 if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
5600 Instr->modifiesRegister(X86::EFLAGS, TRI)) {
5601 Movr0Inst->getParent()->remove(Movr0Inst);
5602 Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
5603 Movr0Inst);
5604 break;
5605 }
5606 }
5607 if (InsertI == InsertE)
5608 return false;
5609 }
5610
5611 // Make sure Sub instruction defines EFLAGS and mark the def live.
5612 MachineOperand *FlagDef =
5613 Sub->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
5614 assert(FlagDef && "Unable to locate a def EFLAGS operand");
5615 FlagDef->setIsDead(false);
5616
5617 CmpInstr.eraseFromParent();
5618
5619 // Modify the condition code of instructions in OpsToUpdate.
5620 for (auto &Op : OpsToUpdate) {
5621 Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
5622 .setImm(Op.second);
5623 }
5624 // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
5625 for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
5626 MBB = *MBB->pred_begin()) {
5627 assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
5628 if (!MBB->isLiveIn(X86::EFLAGS))
5629 MBB->addLiveIn(X86::EFLAGS);
5630 }
5631 return true;
5632}
5633
5634/// Try to remove the load by folding it to a register
5635/// operand at the use. We fold the load instructions if load defines a virtual
5636/// register, the virtual register is used once in the same BB, and the
5637/// instructions in-between do not load or store, and have no side effects.
5639 const MachineRegisterInfo *MRI,
5640 Register &FoldAsLoadDefReg,
5641 MachineInstr *&DefMI) const {
5642 // Check whether we can move DefMI here.
5643 DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
5644 assert(DefMI);
5645 bool SawStore = false;
5646 if (!DefMI->isSafeToMove(SawStore))
5647 return nullptr;
5648
5649 // Collect information about virtual register operands of MI.
5650 SmallVector<unsigned, 1> SrcOperandIds;
5651 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
5652 MachineOperand &MO = MI.getOperand(i);
5653 if (!MO.isReg())
5654 continue;
5655 Register Reg = MO.getReg();
5656 if (Reg != FoldAsLoadDefReg)
5657 continue;
5658 // Do not fold if we have a subreg use or a def.
5659 if (MO.getSubReg() || MO.isDef())
5660 return nullptr;
5661 SrcOperandIds.push_back(i);
5662 }
5663 if (SrcOperandIds.empty())
5664 return nullptr;
5665
5666 // Check whether we can fold the def into SrcOperandId.
5667 if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) {
5668 FoldAsLoadDefReg = 0;
5669 return FoldMI;
5670 }
5671
5672 return nullptr;
5673}
5674
5675/// \returns true if the instruction can be changed to COPY when imm is 0.
5676static bool canConvert2Copy(unsigned Opc) {
5677 switch (Opc) {
5678 default:
5679 return false;
5680 CASE_ND(ADD64ri32)
5681 CASE_ND(SUB64ri32)
5682 CASE_ND(OR64ri32)
5683 CASE_ND(XOR64ri32)
5684 CASE_ND(ADD32ri)
5685 CASE_ND(SUB32ri)
5686 CASE_ND(OR32ri)
5687 CASE_ND(XOR32ri)
5688 return true;
5689 }
5690}
5691
5692/// Convert an ALUrr opcode to corresponding ALUri opcode. Such as
5693/// ADD32rr ==> ADD32ri
5694static unsigned convertALUrr2ALUri(unsigned Opc) {
5695 switch (Opc) {
5696 default:
5697 return 0;
5698#define FROM_TO(FROM, TO) \
5699 case X86::FROM: \
5700 return X86::TO; \
5701 case X86::FROM##_ND: \
5702 return X86::TO##_ND;
5703 FROM_TO(ADD64rr, ADD64ri32)
5704 FROM_TO(ADC64rr, ADC64ri32)
5705 FROM_TO(SUB64rr, SUB64ri32)
5706 FROM_TO(SBB64rr, SBB64ri32)
5707 FROM_TO(AND64rr, AND64ri32)
5708 FROM_TO(OR64rr, OR64ri32)
5709 FROM_TO(XOR64rr, XOR64ri32)
5710 FROM_TO(SHR64rCL, SHR64ri)
5711 FROM_TO(SHL64rCL, SHL64ri)
5712 FROM_TO(SAR64rCL, SAR64ri)
5713 FROM_TO(ROL64rCL, ROL64ri)
5714 FROM_TO(ROR64rCL, ROR64ri)
5715 FROM_TO(RCL64rCL, RCL64ri)
5716 FROM_TO(RCR64rCL, RCR64ri)
5717 FROM_TO(ADD32rr, ADD32ri)
5718 FROM_TO(ADC32rr, ADC32ri)
5719 FROM_TO(SUB32rr, SUB32ri)
5720 FROM_TO(SBB32rr, SBB32ri)
5721 FROM_TO(AND32rr, AND32ri)
5722 FROM_TO(OR32rr, OR32ri)
5723 FROM_TO(XOR32rr, XOR32ri)
5724 FROM_TO(SHR32rCL, SHR32ri)
5725 FROM_TO(SHL32rCL, SHL32ri)
5726 FROM_TO(SAR32rCL, SAR32ri)
5727 FROM_TO(ROL32rCL, ROL32ri)
5728 FROM_TO(ROR32rCL, ROR32ri)
5729 FROM_TO(RCL32rCL, RCL32ri)
5730 FROM_TO(RCR32rCL, RCR32ri)
5731#undef FROM_TO
5732#define FROM_TO(FROM, TO) \
5733 case X86::FROM: \
5734 return X86::TO;
5735 FROM_TO(TEST64rr, TEST64ri32)
5736 FROM_TO(CTEST64rr, CTEST64ri32)
5737 FROM_TO(CMP64rr, CMP64ri32)
5738 FROM_TO(CCMP64rr, CCMP64ri32)
5739 FROM_TO(TEST32rr, TEST32ri)
5740 FROM_TO(CTEST32rr, CTEST32ri)
5741 FROM_TO(CMP32rr, CMP32ri)
5742 FROM_TO(CCMP32rr, CCMP32ri)
5743#undef FROM_TO
5744 }
5745}
5746
5747/// Reg is assigned ImmVal in DefMI, and is used in UseMI.
5748/// If MakeChange is true, this function tries to replace Reg by ImmVal in
5749/// UseMI. If MakeChange is false, just check if folding is possible.
5750//
5751/// \returns true if folding is successful or possible.
5752bool X86InstrInfo::foldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI,
5753 Register Reg, int64_t ImmVal,
5755 bool MakeChange) const {
5756 bool Modified = false;
5757
5758 // 64 bit operations accept sign extended 32 bit immediates.
5759 // 32 bit operations accept all 32 bit immediates, so we don't need to check
5760 // them.
5761 const TargetRegisterClass *RC = nullptr;
5762 if (Reg.isVirtual())
5763 RC = MRI->getRegClass(Reg);
5764 if ((Reg.isPhysical() && X86::GR64RegClass.contains(Reg)) ||
5765 (Reg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC))) {
5766 if (!isInt<32>(ImmVal))
5767 return false;
5768 }
5769
5770 if (UseMI.findRegisterUseOperand(Reg, /*TRI=*/nullptr)->getSubReg())
5771 return false;
5772 // Immediate has larger code size than register. So avoid folding the
5773 // immediate if it has more than 1 use and we are optimizing for size.
5774 if (UseMI.getMF()->getFunction().hasOptSize() && Reg.isVirtual() &&
5775 !MRI->hasOneNonDBGUse(Reg))
5776 return false;
5777
5778 unsigned Opc = UseMI.getOpcode();
5779 unsigned NewOpc;
5780 if (Opc == TargetOpcode::COPY) {
5781 Register ToReg = UseMI.getOperand(0).getReg();
5782 const TargetRegisterClass *RC = nullptr;
5783 if (ToReg.isVirtual())
5784 RC = MRI->getRegClass(ToReg);
5785 bool GR32Reg = (ToReg.isVirtual() && X86::GR32RegClass.hasSubClassEq(RC)) ||
5786 (ToReg.isPhysical() && X86::GR32RegClass.contains(ToReg));
5787 bool GR64Reg = (ToReg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC)) ||
5788 (ToReg.isPhysical() && X86::GR64RegClass.contains(ToReg));
5789 bool GR8Reg = (ToReg.isVirtual() && X86::GR8RegClass.hasSubClassEq(RC)) ||
5790 (ToReg.isPhysical() && X86::GR8RegClass.contains(ToReg));
5791
5792 if (ImmVal == 0) {
5793 // We have MOV32r0 only.
5794 if (!GR32Reg)
5795 return false;
5796 }
5797
5798 if (GR64Reg) {
5799 if (isUInt<32>(ImmVal))
5800 NewOpc = X86::MOV32ri64;
5801 else
5802 NewOpc = X86::MOV64ri;
5803 } else if (GR32Reg) {
5804 NewOpc = X86::MOV32ri;
5805 if (ImmVal == 0) {
5806 // MOV32r0 clobbers EFLAGS.
5808 if (UseMI.getParent()->computeRegisterLiveness(
5809 TRI, X86::EFLAGS, UseMI) != MachineBasicBlock::LQR_Dead)
5810 return false;
5811
5812 // MOV32r0 is different than other cases because it doesn't encode the
5813 // immediate in the instruction. So we directly modify it here.
5814 if (!MakeChange)
5815 return true;
5816 UseMI.setDesc(get(X86::MOV32r0));
5817 UseMI.removeOperand(
5818 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr));
5819 UseMI.addOperand(MachineOperand::CreateReg(X86::EFLAGS, /*isDef=*/true,
5820 /*isImp=*/true,
5821 /*isKill=*/false,
5822 /*isDead=*/true));
5823 Modified = true;
5824 }
5825 } else if (GR8Reg)
5826 NewOpc = X86::MOV8ri;
5827 else
5828 return false;
5829 } else
5830 NewOpc = convertALUrr2ALUri(Opc);
5831
5832 if (!NewOpc)
5833 return false;
5834
5835 // For SUB instructions the immediate can only be the second source operand.
5836 if ((NewOpc == X86::SUB64ri32 || NewOpc == X86::SUB32ri ||
5837 NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri ||
5838 NewOpc == X86::SUB64ri32_ND || NewOpc == X86::SUB32ri_ND ||
5839 NewOpc == X86::SBB64ri32_ND || NewOpc == X86::SBB32ri_ND) &&
5840 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != 2)
5841 return false;
5842 // For CMP instructions the immediate can only be at index 1.
5843 if (((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) ||
5844 (NewOpc == X86::CCMP64ri32 || NewOpc == X86::CCMP32ri)) &&
5845 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != 1)
5846 return false;
5847
5848 using namespace X86;
5849 if (isSHL(Opc) || isSHR(Opc) || isSAR(Opc) || isROL(Opc) || isROR(Opc) ||
5850 isRCL(Opc) || isRCR(Opc)) {
5851 unsigned RegIdx = UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
5852 if (RegIdx < 2)
5853 return false;
5854 if (!isInt<8>(ImmVal))
5855 return false;
5856 assert(Reg == X86::CL);
5857
5858 if (!MakeChange)
5859 return true;
5860 UseMI.setDesc(get(NewOpc));
5861 UseMI.removeOperand(RegIdx);
5862 UseMI.addOperand(MachineOperand::CreateImm(ImmVal));
5863 // Reg is physical register $cl, so we don't know if DefMI is dead through
5864 // MRI. Let the caller handle it, or pass dead-mi-elimination can delete
5865 // the dead physical register define instruction.
5866 return true;
5867 }
5868
5869 if (!MakeChange)
5870 return true;
5871
5872 if (!Modified) {
5873 // Modify the instruction.
5874 if (ImmVal == 0 && canConvert2Copy(NewOpc) &&
5875 UseMI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) {
5876 // %100 = add %101, 0
5877 // ==>
5878 // %100 = COPY %101
5879 UseMI.setDesc(get(TargetOpcode::COPY));
5880 UseMI.removeOperand(
5881 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr));
5882 UseMI.removeOperand(
5883 UseMI.findRegisterDefOperandIdx(X86::EFLAGS, /*TRI=*/nullptr));
5884 UseMI.untieRegOperand(0);
5887 } else {
5888 unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
5889 unsigned ImmOpNum = 2;
5890 if (!UseMI.getOperand(0).isDef()) {
5891 Op1 = 0; // TEST, CMP, CTEST, CCMP
5892 ImmOpNum = 1;
5893 }
5894 if (Opc == TargetOpcode::COPY)
5895 ImmOpNum = 1;
5896 if (findCommutedOpIndices(UseMI, Op1, Op2) &&
5897 UseMI.getOperand(Op1).getReg() == Reg)
5898 commuteInstruction(UseMI);
5899
5900 assert(UseMI.getOperand(ImmOpNum).getReg() == Reg);
5901 UseMI.setDesc(get(NewOpc));
5902 UseMI.getOperand(ImmOpNum).ChangeToImmediate(ImmVal);
5903 }
5904 }
5905
5906 if (Reg.isVirtual() && MRI->use_nodbg_empty(Reg))
5908
5909 return true;
5910}
5911
5912/// foldImmediate - 'Reg' is known to be defined by a move immediate
5913/// instruction, try to fold the immediate into the use instruction.
5915 Register Reg, MachineRegisterInfo *MRI) const {
5916 int64_t ImmVal;
5917 if (!getConstValDefinedInReg(DefMI, Reg, ImmVal))
5918 return false;
5919
5920 return foldImmediateImpl(UseMI, &DefMI, Reg, ImmVal, MRI, true);
5921}
5922
5923/// Expand a single-def pseudo instruction to a two-addr
5924/// instruction with two undef reads of the register being defined.
5925/// This is used for mapping:
5926/// %xmm4 = V_SET0
5927/// to:
5928/// %xmm4 = PXORrr undef %xmm4, undef %xmm4
5929///
5931 const MCInstrDesc &Desc) {
5932 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
5933 Register Reg = MIB.getReg(0);
5934 MIB->setDesc(Desc);
5935
5936 // MachineInstr::addOperand() will insert explicit operands before any
5937 // implicit operands.
5939 // But we don't trust that.
5940 assert(MIB.getReg(1) == Reg && MIB.getReg(2) == Reg && "Misplaced operand");
5941 return true;
5942}
5943
5944/// Expand a single-def pseudo instruction to a two-addr
5945/// instruction with two %k0 reads.
5946/// This is used for mapping:
5947/// %k4 = K_SET1
5948/// to:
5949/// %k4 = KXNORrr %k0, %k0
5951 Register Reg) {
5952 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
5953 MIB->setDesc(Desc);
5955 return true;
5956}
5957
5959 bool MinusOne) {
5960 MachineBasicBlock &MBB = *MIB->getParent();
5961 const DebugLoc &DL = MIB->getDebugLoc();
5962 Register Reg = MIB.getReg(0);
5963
5964 // Insert the XOR.
5965 BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
5966 .addReg(Reg, RegState::Undef)
5967 .addReg(Reg, RegState::Undef);
5968
5969 // Turn the pseudo into an INC or DEC.
5970 MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
5971 MIB.addReg(Reg);
5972
5973 return true;
5974}
5975
5977 const TargetInstrInfo &TII,
5978 const X86Subtarget &Subtarget) {
5979 MachineBasicBlock &MBB = *MIB->getParent();
5980 const DebugLoc &DL = MIB->getDebugLoc();
5981 int64_t Imm = MIB->getOperand(1).getImm();
5982 assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
5984
5985 int StackAdjustment;
5986
5987 if (Subtarget.is64Bit()) {
5988 assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
5989 MIB->getOpcode() == X86::MOV32ImmSExti8);
5990
5991 // Can't use push/pop lowering if the function might write to the red zone.
5992 X86MachineFunctionInfo *X86FI =
5994 if (X86FI->getUsesRedZone()) {
5995 MIB->setDesc(TII.get(MIB->getOpcode() == X86::MOV32ImmSExti8
5996 ? X86::MOV32ri
5997 : X86::MOV64ri));
5998 return true;
5999 }
6000
6001 // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
6002 // widen the register if necessary.
6003 StackAdjustment = 8;
6004 BuildMI(MBB, I, DL, TII.get(X86::PUSH64i32)).addImm(Imm);
6005 MIB->setDesc(TII.get(X86::POP64r));
6006 MIB->getOperand(0).setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
6007 } else {
6008 assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
6009 StackAdjustment = 4;
6010 BuildMI(MBB, I, DL, TII.get(X86::PUSH32i)).addImm(Imm);
6011 MIB->setDesc(TII.get(X86::POP32r));
6012 }
6013 MIB->removeOperand(1);
6015
6016 // Build CFI if necessary.
6017 MachineFunction &MF = *MBB.getParent();
6018 const X86FrameLowering *TFL = Subtarget.getFrameLowering();
6019 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
6020 bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
6021 bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
6022 if (EmitCFI) {
6023 TFL->BuildCFI(
6024 MBB, I, DL,
6025 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
6026 TFL->BuildCFI(
6027 MBB, std::next(I), DL,
6028 MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
6029 }
6030
6031 return true;
6032}
6033
6034// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
6035// code sequence is needed for other targets.
6037 const TargetInstrInfo &TII) {
6038 MachineBasicBlock &MBB = *MIB->getParent();
6039 const DebugLoc &DL = MIB->getDebugLoc();
6040 Register Reg = MIB.getReg(0);
6041 const GlobalValue *GV =
6042 cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
6043 auto Flags = MachineMemOperand::MOLoad |
6047 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, Align(8));
6049
6050 BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg)
6051 .addReg(X86::RIP)
6052 .addImm(1)
6053 .addReg(0)
6055 .addReg(0)
6056 .addMemOperand(MMO);
6057 MIB->setDebugLoc(DL);
6058 MIB->setDesc(TII.get(X86::MOV64rm));
6059 MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
6060}
6061
6063 MachineBasicBlock &MBB = *MIB->getParent();
6064 MachineFunction &MF = *MBB.getParent();
6065 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
6066 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
6067 unsigned XorOp =
6068 MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr;
6069 MIB->setDesc(TII.get(XorOp));
6070 MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef);
6071 return true;
6072}
6073
6074// This is used to handle spills for 128/256-bit registers when we have AVX512,
6075// but not VLX. If it uses an extended register we need to use an instruction
6076// that loads the lower 128/256-bit, but is available with only AVX512F.
6078 const TargetRegisterInfo *TRI,
6079 const MCInstrDesc &LoadDesc,
6080 const MCInstrDesc &BroadcastDesc, unsigned SubIdx) {
6081 Register DestReg = MIB.getReg(0);
6082 // Check if DestReg is XMM16-31 or YMM16-31.
6083 if (TRI->getEncodingValue(DestReg) < 16) {
6084 // We can use a normal VEX encoded load.
6085 MIB->setDesc(LoadDesc);
6086 } else {
6087 // Use a 128/256-bit VBROADCAST instruction.
6088 MIB->setDesc(BroadcastDesc);
6089 // Change the destination to a 512-bit register.
6090 DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
6091 MIB->getOperand(0).setReg(DestReg);
6092 }
6093 return true;
6094}
6095
6096// This is used to handle spills for 128/256-bit registers when we have AVX512,
6097// but not VLX. If it uses an extended register we need to use an instruction
6098// that stores the lower 128/256-bit, but is available with only AVX512F.
6100 const TargetRegisterInfo *TRI,
6101 const MCInstrDesc &StoreDesc,
6102 const MCInstrDesc &ExtractDesc, unsigned SubIdx) {
6103 Register SrcReg = MIB.getReg(X86::AddrNumOperands);
6104 // Check if DestReg is XMM16-31 or YMM16-31.
6105 if (TRI->getEncodingValue(SrcReg) < 16) {
6106 // We can use a normal VEX encoded store.
6107 MIB->setDesc(StoreDesc);
6108 } else {
6109 // Use a VEXTRACTF instruction.
6110 MIB->setDesc(ExtractDesc);
6111 // Change the destination to a 512-bit register.
6112 SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
6114 MIB.addImm(0x0); // Append immediate to extract from the lower bits.
6115 }
6116
6117 return true;
6118}
6119
6121 MIB->setDesc(Desc);
6122 int64_t ShiftAmt = MIB->getOperand(2).getImm();
6123 // Temporarily remove the immediate so we can add another source register.
6124 MIB->removeOperand(2);
6125 // Add the register. Don't copy the kill flag if there is one.
6126 MIB.addReg(MIB.getReg(1), getUndefRegState(MIB->getOperand(1).isUndef()));
6127 // Add back the immediate.
6128 MIB.addImm(ShiftAmt);
6129 return true;
6130}
6131
6133 bool HasAVX = Subtarget.hasAVX();
6134 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
6135 switch (MI.getOpcode()) {
6136 case X86::MOV32r0:
6137 return Expand2AddrUndef(MIB, get(X86::XOR32rr));
6138 case X86::MOV32r1:
6139 return expandMOV32r1(MIB, *this, /*MinusOne=*/false);
6140 case X86::MOV32r_1:
6141 return expandMOV32r1(MIB, *this, /*MinusOne=*/true);
6142 case X86::MOV32ImmSExti8:
6143 case X86::MOV64ImmSExti8:
6144 return ExpandMOVImmSExti8(MIB, *this, Subtarget);
6145 case X86::SETB_C32r:
6146 return Expand2AddrUndef(MIB, get(X86::SBB32rr));
6147 case X86::SETB_C64r:
6148 return Expand2AddrUndef(MIB, get(X86::SBB64rr));
6149 case X86::MMX_SET0:
6150 return Expand2AddrUndef(MIB, get(X86::MMX_PXORrr));
6151 case X86::V_SET0:
6152 case X86::FsFLD0SS:
6153 case X86::FsFLD0SD:
6154 case X86::FsFLD0SH:
6155 case X86::FsFLD0F128:
6156 return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
6157 case X86::AVX_SET0: {
6158 assert(HasAVX && "AVX not supported");
6160 Register SrcReg = MIB.getReg(0);
6161 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
6162 MIB->getOperand(0).setReg(XReg);
6163 Expand2AddrUndef(MIB, get(X86::VXORPSrr));
6164 MIB.addReg(SrcReg, RegState::ImplicitDefine);
6165 return true;
6166 }
6167 case X86::AVX512_128_SET0:
6168 case X86::AVX512_FsFLD0SH:
6169 case X86::AVX512_FsFLD0SS:
6170 case X86::AVX512_FsFLD0SD:
6171 case X86::AVX512_FsFLD0F128: {
6172 bool HasVLX = Subtarget.hasVLX();
6173 Register SrcReg = MIB.getReg(0);
6175 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
6176 return Expand2AddrUndef(MIB,
6177 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
6178 // Extended register without VLX. Use a larger XOR.
6179 SrcReg =
6180 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
6181 MIB->getOperand(0).setReg(SrcReg);
6182 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
6183 }
6184 case X86::AVX512_256_SET0:
6185 case X86::AVX512_512_SET0: {
6186 bool HasVLX = Subtarget.hasVLX();
6187 Register SrcReg = MIB.getReg(0);
6189 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
6190 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
6191 MIB->getOperand(0).setReg(XReg);
6192 Expand2AddrUndef(MIB, get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
6193 MIB.addReg(SrcReg, RegState::ImplicitDefine);
6194 return true;
6195 }
6196 if (MI.getOpcode() == X86::AVX512_256_SET0) {
6197 // No VLX so we must reference a zmm.
6198 unsigned ZReg =
6199 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
6200 MIB->getOperand(0).setReg(ZReg);
6201 }
6202 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
6203 }
6204 case X86::V_SETALLONES:
6205 return Expand2AddrUndef(MIB,
6206 get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
6207 case X86::AVX2_SETALLONES:
6208 return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
6209 case X86::AVX1_SETALLONES: {
6210 Register Reg = MIB.getReg(0);
6211 // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
6212 MIB->setDesc(get(X86::VCMPPSYrri));
6213 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
6214 return true;
6215 }
6216 case X86::AVX512_512_SETALLONES: {
6217 Register Reg = MIB.getReg(0);
6218 MIB->setDesc(get(X86::VPTERNLOGDZrri));
6219 // VPTERNLOGD needs 3 register inputs and an immediate.
6220 // 0xff will return 1s for any input.
6221 MIB.addReg(Reg, RegState::Undef)
6222 .addReg(Reg, RegState::Undef)
6223 .addReg(Reg, RegState::Undef)
6224 .addImm(0xff);
6225 return true;
6226 }
6227 case X86::AVX512_512_SEXT_MASK_32:
6228 case X86::AVX512_512_SEXT_MASK_64: {
6229 Register Reg = MIB.getReg(0);
6230 Register MaskReg = MIB.getReg(1);
6231 unsigned MaskState = getRegState(MIB->getOperand(1));
6232 unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64)
6233 ? X86::VPTERNLOGQZrrikz
6234 : X86::VPTERNLOGDZrrikz;
6235 MI.removeOperand(1);
6236 MIB->setDesc(get(Opc));
6237 // VPTERNLOG needs 3 register inputs and an immediate.
6238 // 0xff will return 1s for any input.
6239 MIB.addReg(Reg, RegState::Undef)
6240 .addReg(MaskReg, MaskState)
6241 .addReg(Reg, RegState::Undef)
6242 .addReg(Reg, RegState::Undef)
6243 .addImm(0xff);
6244 return true;
6245 }
6246 case X86::VMOVAPSZ128rm_NOVLX:
6247 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
6248 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6249 case X86::VMOVUPSZ128rm_NOVLX:
6250 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
6251 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6252 case X86::VMOVAPSZ256rm_NOVLX:
6253 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
6254 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6255 case X86::VMOVUPSZ256rm_NOVLX:
6256 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
6257 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6258 case X86::VMOVAPSZ128mr_NOVLX:
6259 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
6260 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6261 case X86::VMOVUPSZ128mr_NOVLX:
6262 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
6263 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6264 case X86::VMOVAPSZ256mr_NOVLX:
6265 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
6266 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6267 case X86::VMOVUPSZ256mr_NOVLX:
6268 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
6269 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6270 case X86::MOV32ri64: {
6271 Register Reg = MIB.getReg(0);
6272 Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
6273 MI.setDesc(get(X86::MOV32ri));
6274 MIB->getOperand(0).setReg(Reg32);
6276 return true;
6277 }
6278
6279 case X86::RDFLAGS32:
6280 case X86::RDFLAGS64: {
6281 unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64;
6282 MachineBasicBlock &MBB = *MIB->getParent();
6283
6284 MachineInstr *NewMI = BuildMI(MBB, MI, MIB->getDebugLoc(),
6285 get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
6286 .getInstr();
6287
6288 // Permit reads of the EFLAGS and DF registers without them being defined.
6289 // This intrinsic exists to read external processor state in flags, such as
6290 // the trap flag, interrupt flag, and direction flag, none of which are
6291 // modeled by the backend.
6292 assert(NewMI->getOperand(2).getReg() == X86::EFLAGS &&
6293 "Unexpected register in operand! Should be EFLAGS.");
6294 NewMI->getOperand(2).setIsUndef();
6295 assert(NewMI->getOperand(3).getReg() == X86::DF &&
6296 "Unexpected register in operand! Should be DF.");
6297 NewMI->getOperand(3).setIsUndef();
6298
6299 MIB->setDesc(get(Is64Bit ? X86::POP64r : X86::POP32r));
6300 return true;
6301 }
6302
6303 case X86::WRFLAGS32:
6304 case X86::WRFLAGS64: {
6305 unsigned Is64Bit = MI.getOpcode() == X86::WRFLAGS64;
6306 MachineBasicBlock &MBB = *MIB->getParent();
6307
6308 BuildMI(MBB, MI, MIB->getDebugLoc(),
6309 get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
6310 .addReg(MI.getOperand(0).getReg());
6311 BuildMI(MBB, MI, MIB->getDebugLoc(),
6312 get(Is64Bit ? X86::POPF64 : X86::POPF32));
6313 MI.eraseFromParent();
6314 return true;
6315 }
6316
6317 // KNL does not recognize dependency-breaking idioms for mask registers,
6318 // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
6319 // Using %k0 as the undef input register is a performance heuristic based
6320 // on the assumption that %k0 is used less frequently than the other mask
6321 // registers, since it is not usable as a write mask.
6322 // FIXME: A more advanced approach would be to choose the best input mask
6323 // register based on context.
6324 case X86::KSET0W:
6325 return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
6326 case X86::KSET0D:
6327 return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
6328 case X86::KSET0Q:
6329 return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
6330 case X86::KSET1W:
6331 return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
6332 case X86::KSET1D:
6333 return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
6334 case X86::KSET1Q:
6335 return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
6336 case TargetOpcode::LOAD_STACK_GUARD:
6337 expandLoadStackGuard(MIB, *this);
6338 return true;
6339 case X86::XOR64_FP:
6340 case X86::XOR32_FP:
6341 return expandXorFP(MIB, *this);
6342 case X86::SHLDROT32ri:
6343 return expandSHXDROT(MIB, get(X86::SHLD32rri8));
6344 case X86::SHLDROT64ri:
6345 return expandSHXDROT(MIB, get(X86::SHLD64rri8));
6346 case X86::SHRDROT32ri:
6347 return expandSHXDROT(MIB, get(X86::SHRD32rri8));
6348 case X86::SHRDROT64ri:
6349 return expandSHXDROT(MIB, get(X86::SHRD64rri8));
6350 case X86::ADD8rr_DB:
6351 MIB->setDesc(get(X86::OR8rr));
6352 break;
6353 case X86::ADD16rr_DB:
6354 MIB->setDesc(get(X86::OR16rr));
6355 break;
6356 case X86::ADD32rr_DB:
6357 MIB->setDesc(get(X86::OR32rr));
6358 break;
6359 case X86::ADD64rr_DB:
6360 MIB->setDesc(get(X86::OR64rr));
6361 break;
6362 case X86::ADD8ri_DB:
6363 MIB->setDesc(get(X86::OR8ri));
6364 break;
6365 case X86::ADD16ri_DB:
6366 MIB->setDesc(get(X86::OR16ri));
6367 break;
6368 case X86::ADD32ri_DB:
6369 MIB->setDesc(get(X86::OR32ri));
6370 break;
6371 case X86::ADD64ri32_DB:
6372 MIB->setDesc(get(X86::OR64ri32));
6373 break;
6374 }
6375 return false;
6376}
6377
6378/// Return true for all instructions that only update
6379/// the first 32 or 64-bits of the destination register and leave the rest
6380/// unmodified. This can be used to avoid folding loads if the instructions
6381/// only update part of the destination register, and the non-updated part is
6382/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
6383/// instructions breaks the partial register dependency and it can improve
6384/// performance. e.g.:
6385///
6386/// movss (%rdi), %xmm0
6387/// cvtss2sd %xmm0, %xmm0
6388///
6389/// Instead of
6390/// cvtss2sd (%rdi), %xmm0
6391///
6392/// FIXME: This should be turned into a TSFlags.
6393///
6394static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget,
6395 bool ForLoadFold = false) {
6396 switch (Opcode) {
6397 case X86::CVTSI2SSrr:
6398 case X86::CVTSI2SSrm:
6399 case X86::CVTSI642SSrr:
6400 case X86::CVTSI642SSrm:
6401 case X86::CVTSI2SDrr:
6402 case X86::CVTSI2SDrm:
6403 case X86::CVTSI642SDrr:
6404 case X86::CVTSI642SDrm:
6405 // Load folding won't effect the undef register update since the input is
6406 // a GPR.
6407 return !ForLoadFold;
6408 case X86::CVTSD2SSrr:
6409 case X86::CVTSD2SSrm:
6410 case X86::CVTSS2SDrr:
6411 case X86::CVTSS2SDrm:
6412 case X86::MOVHPDrm:
6413 case X86::MOVHPSrm:
6414 case X86::MOVLPDrm:
6415 case X86::MOVLPSrm:
6416 case X86::RCPSSr:
6417 case X86::RCPSSm:
6418 case X86::RCPSSr_Int:
6419 case X86::RCPSSm_Int:
6420 case X86::ROUNDSDri:
6421 case X86::ROUNDSDmi:
6422 case X86::ROUNDSSri:
6423 case X86::ROUNDSSmi:
6424 case X86::RSQRTSSr:
6425 case X86::RSQRTSSm:
6426 case X86::RSQRTSSr_Int:
6427 case X86::RSQRTSSm_Int:
6428 case X86::SQRTSSr:
6429 case X86::SQRTSSm:
6430 case X86::SQRTSSr_Int:
6431 case X86::SQRTSSm_Int:
6432 case X86::SQRTSDr:
6433 case X86::SQRTSDm:
6434 case X86::SQRTSDr_Int:
6435 case X86::SQRTSDm_Int:
6436 return true;
6437 case X86::VFCMULCPHZ128rm:
6438 case X86::VFCMULCPHZ128rmb:
6439 case X86::VFCMULCPHZ128rmbkz:
6440 case X86::VFCMULCPHZ128rmkz:
6441 case X86::VFCMULCPHZ128rr:
6442 case X86::VFCMULCPHZ128rrkz:
6443 case X86::VFCMULCPHZ256rm:
6444 case X86::VFCMULCPHZ256rmb:
6445 case X86::VFCMULCPHZ256rmbkz:
6446 case X86::VFCMULCPHZ256rmkz:
6447 case X86::VFCMULCPHZ256rr:
6448 case X86::VFCMULCPHZ256rrkz:
6449 case X86::VFCMULCPHZrm:
6450 case X86::VFCMULCPHZrmb:
6451 case X86::VFCMULCPHZrmbkz:
6452 case X86::VFCMULCPHZrmkz:
6453 case X86::VFCMULCPHZrr:
6454 case X86::VFCMULCPHZrrb:
6455 case X86::VFCMULCPHZrrbkz:
6456 case X86::VFCMULCPHZrrkz:
6457 case X86::VFMULCPHZ128rm:
6458 case X86::VFMULCPHZ128rmb:
6459 case X86::VFMULCPHZ128rmbkz:
6460 case X86::VFMULCPHZ128rmkz:
6461 case X86::VFMULCPHZ128rr:
6462 case X86::VFMULCPHZ128rrkz:
6463 case X86::VFMULCPHZ256rm:
6464 case X86::VFMULCPHZ256rmb:
6465 case X86::VFMULCPHZ256rmbkz:
6466 case X86::VFMULCPHZ256rmkz:
6467 case X86::VFMULCPHZ256rr:
6468 case X86::VFMULCPHZ256rrkz:
6469 case X86::VFMULCPHZrm:
6470 case X86::VFMULCPHZrmb:
6471 case X86::VFMULCPHZrmbkz:
6472 case X86::VFMULCPHZrmkz:
6473 case X86::VFMULCPHZrr:
6474 case X86::VFMULCPHZrrb:
6475 case X86::VFMULCPHZrrbkz:
6476 case X86::VFMULCPHZrrkz:
6477 case X86::VFCMULCSHZrm:
6478 case X86::VFCMULCSHZrmkz:
6479 case X86::VFCMULCSHZrr:
6480 case X86::VFCMULCSHZrrb:
6481 case X86::VFCMULCSHZrrbkz:
6482 case X86::VFCMULCSHZrrkz:
6483 case X86::VFMULCSHZrm:
6484 case X86::VFMULCSHZrmkz:
6485 case X86::VFMULCSHZrr:
6486 case X86::VFMULCSHZrrb:
6487 case X86::VFMULCSHZrrbkz:
6488 case X86::VFMULCSHZrrkz:
6489 return Subtarget.hasMULCFalseDeps();
6490 case X86::VPERMDYrm:
6491 case X86::VPERMDYrr:
6492 case X86::VPERMQYmi:
6493 case X86::VPERMQYri:
6494 case X86::VPERMPSYrm:
6495 case X86::VPERMPSYrr:
6496 case X86::VPERMPDYmi:
6497 case X86::VPERMPDYri:
6498 case X86::VPERMDZ256rm:
6499 case X86::VPERMDZ256rmb:
6500 case X86::VPERMDZ256rmbkz:
6501 case X86::VPERMDZ256rmkz:
6502 case X86::VPERMDZ256rr:
6503 case X86::VPERMDZ256rrkz:
6504 case X86::VPERMDZrm:
6505 case X86::VPERMDZrmb:
6506 case X86::VPERMDZrmbkz:
6507 case X86::VPERMDZrmkz:
6508 case X86::VPERMDZrr:
6509 case X86::VPERMDZrrkz:
6510 case X86::VPERMQZ256mbi:
6511 case X86::VPERMQZ256mbikz:
6512 case X86::VPERMQZ256mi:
6513 case X86::VPERMQZ256mikz:
6514 case X86::VPERMQZ256ri:
6515 case X86::VPERMQZ256rikz:
6516 case X86::VPERMQZ256rm:
6517 case X86::VPERMQZ256rmb:
6518 case X86::VPERMQZ256rmbkz:
6519 case X86::VPERMQZ256rmkz:
6520 case X86::VPERMQZ256rr:
6521 case X86::VPERMQZ256rrkz:
6522 case X86::VPERMQZmbi:
6523 case X86::VPERMQZmbikz:
6524 case X86::VPERMQZmi:
6525 case X86::VPERMQZmikz:
6526 case X86::VPERMQZri:
6527 case X86::VPERMQZrikz:
6528 case X86::VPERMQZrm:
6529 case X86::VPERMQZrmb:
6530 case X86::VPERMQZrmbkz:
6531 case X86::VPERMQZrmkz:
6532 case X86::VPERMQZrr:
6533 case X86::VPERMQZrrkz:
6534 case X86::VPERMPSZ256rm:
6535 case X86::VPERMPSZ256rmb:
6536 case X86::VPERMPSZ256rmbkz:
6537 case X86::VPERMPSZ256rmkz:
6538 case X86::VPERMPSZ256rr:
6539 case X86::VPERMPSZ256rrkz:
6540 case X86::VPERMPSZrm:
6541 case X86::VPERMPSZrmb:
6542 case X86::VPERMPSZrmbkz:
6543 case X86::VPERMPSZrmkz:
6544 case X86::VPERMPSZrr:
6545 case X86::VPERMPSZrrkz:
6546 case X86::VPERMPDZ256mbi:
6547 case X86::VPERMPDZ256mbikz:
6548 case X86::VPERMPDZ256mi:
6549 case X86::VPERMPDZ256mikz:
6550 case X86::VPERMPDZ256ri:
6551 case X86::VPERMPDZ256rikz:
6552 case X86::VPERMPDZ256rm:
6553 case X86::VPERMPDZ256rmb:
6554 case X86::VPERMPDZ256rmbkz:
6555 case X86::VPERMPDZ256rmkz:
6556 case X86::VPERMPDZ256rr:
6557 case X86::VPERMPDZ256rrkz:
6558 case X86::VPERMPDZmbi:
6559 case X86::VPERMPDZmbikz:
6560 case X86::VPERMPDZmi:
6561 case X86::VPERMPDZmikz:
6562 case X86::VPERMPDZri:
6563 case X86::VPERMPDZrikz:
6564 case X86::VPERMPDZrm:
6565 case X86::VPERMPDZrmb:
6566 case X86::VPERMPDZrmbkz:
6567 case X86::VPERMPDZrmkz:
6568 case X86::VPERMPDZrr:
6569 case X86::VPERMPDZrrkz:
6570 return Subtarget.hasPERMFalseDeps();
6571 case X86::VRANGEPDZ128rmbi:
6572 case X86::VRANGEPDZ128rmbikz:
6573 case X86::VRANGEPDZ128rmi:
6574 case X86::VRANGEPDZ128rmikz:
6575 case X86::VRANGEPDZ128rri:
6576 case X86::VRANGEPDZ128rrikz:
6577 case X86::VRANGEPDZ256rmbi:
6578 case X86::VRANGEPDZ256rmbikz:
6579 case X86::VRANGEPDZ256rmi:
6580 case X86::VRANGEPDZ256rmikz:
6581 case X86::VRANGEPDZ256rri:
6582 case X86::VRANGEPDZ256rrikz:
6583 case X86::VRANGEPDZrmbi:
6584 case X86::VRANGEPDZrmbikz:
6585 case X86::VRANGEPDZrmi:
6586 case X86::VRANGEPDZrmikz:
6587 case X86::VRANGEPDZrri:
6588 case X86::VRANGEPDZrrib:
6589 case X86::VRANGEPDZrribkz:
6590 case X86::VRANGEPDZrrikz:
6591 case X86::VRANGEPSZ128rmbi:
6592 case X86::VRANGEPSZ128rmbikz:
6593 case X86::VRANGEPSZ128rmi:
6594 case X86::VRANGEPSZ128rmikz:
6595 case X86::VRANGEPSZ128rri:
6596 case X86::VRANGEPSZ128rrikz:
6597 case X86::VRANGEPSZ256rmbi:
6598 case X86::VRANGEPSZ256rmbikz:
6599 case X86::VRANGEPSZ256rmi:
6600 case X86::VRANGEPSZ256rmikz:
6601 case X86::VRANGEPSZ256rri:
6602 case X86::VRANGEPSZ256rrikz:
6603 case X86::VRANGEPSZrmbi:
6604 case X86::VRANGEPSZrmbikz:
6605 case X86::VRANGEPSZrmi:
6606 case X86::VRANGEPSZrmikz:
6607 case X86::VRANGEPSZrri:
6608 case X86::VRANGEPSZrrib:
6609 case X86::VRANGEPSZrribkz:
6610 case X86::VRANGEPSZrrikz:
6611 case X86::VRANGESDZrmi:
6612 case X86::VRANGESDZrmikz:
6613 case X86::VRANGESDZrri:
6614 case X86::VRANGESDZrrib:
6615 case X86::VRANGESDZrribkz:
6616 case X86::VRANGESDZrrikz:
6617 case X86::VRANGESSZrmi:
6618 case X86::VRANGESSZrmikz:
6619 case X86::VRANGESSZrri:
6620 case X86::VRANGESSZrrib:
6621 case X86::VRANGESSZrribkz:
6622 case X86::VRANGESSZrrikz:
6623 return Subtarget.hasRANGEFalseDeps();
6624 case X86::VGETMANTSSZrmi:
6625 case X86::VGETMANTSSZrmikz:
6626 case X86::VGETMANTSSZrri:
6627 case X86::VGETMANTSSZrrib:
6628 case X86::VGETMANTSSZrribkz:
6629 case X86::VGETMANTSSZrrikz:
6630 case X86::VGETMANTSDZrmi:
6631 case X86::VGETMANTSDZrmikz:
6632 case X86::VGETMANTSDZrri:
6633 case X86::VGETMANTSDZrrib:
6634 case X86::VGETMANTSDZrribkz:
6635 case X86::VGETMANTSDZrrikz:
6636 case X86::VGETMANTSHZrmi:
6637 case X86::VGETMANTSHZrmikz:
6638 case X86::VGETMANTSHZrri:
6639 case X86::VGETMANTSHZrrib:
6640 case X86::VGETMANTSHZrribkz:
6641 case X86::VGETMANTSHZrrikz:
6642 case X86::VGETMANTPSZ128rmbi:
6643 case X86::VGETMANTPSZ128rmbikz:
6644 case X86::VGETMANTPSZ128rmi:
6645 case X86::VGETMANTPSZ128rmikz:
6646 case X86::VGETMANTPSZ256rmbi:
6647 case X86::VGETMANTPSZ256rmbikz:
6648 case X86::VGETMANTPSZ256rmi:
6649 case X86::VGETMANTPSZ256rmikz:
6650 case X86::VGETMANTPSZrmbi:
6651 case X86::VGETMANTPSZrmbikz:
6652 case X86::VGETMANTPSZrmi:
6653 case X86::VGETMANTPSZrmikz:
6654 case X86::VGETMANTPDZ128rmbi:
6655 case X86::VGETMANTPDZ128rmbikz:
6656 case X86::VGETMANTPDZ128rmi:
6657 case X86::VGETMANTPDZ128rmikz:
6658 case X86::VGETMANTPDZ256rmbi:
6659 case X86::VGETMANTPDZ256rmbikz:
6660 case X86::VGETMANTPDZ256rmi:
6661 case X86::VGETMANTPDZ256rmikz:
6662 case X86::VGETMANTPDZrmbi:
6663 case X86::VGETMANTPDZrmbikz:
6664 case X86::VGETMANTPDZrmi:
6665 case X86::VGETMANTPDZrmikz:
6666 return Subtarget.hasGETMANTFalseDeps();
6667 case X86::VPMULLQZ128rm:
6668 case X86::VPMULLQZ128rmb:
6669 case X86::VPMULLQZ128rmbkz:
6670 case X86::VPMULLQZ128rmkz:
6671 case X86::VPMULLQZ128rr:
6672 case X86::VPMULLQZ128rrkz:
6673 case X86::VPMULLQZ256rm:
6674 case X86::VPMULLQZ256rmb:
6675 case X86::VPMULLQZ256rmbkz:
6676 case X86::VPMULLQZ256rmkz:
6677 case X86::VPMULLQZ256rr:
6678 case X86::VPMULLQZ256rrkz:
6679 case X86::VPMULLQZrm:
6680 case X86::VPMULLQZrmb:
6681 case X86::VPMULLQZrmbkz:
6682 case X86::VPMULLQZrmkz:
6683 case X86::VPMULLQZrr:
6684 case X86::VPMULLQZrrkz:
6685 return Subtarget.hasMULLQFalseDeps();
6686 // GPR
6687 case X86::POPCNT32rm:
6688 case X86::POPCNT32rr:
6689 case X86::POPCNT64rm:
6690 case X86::POPCNT64rr:
6691 return Subtarget.hasPOPCNTFalseDeps();
6692 case X86::LZCNT32rm:
6693 case X86::LZCNT32rr:
6694 case X86::LZCNT64rm:
6695 case X86::LZCNT64rr:
6696 case X86::TZCNT32rm:
6697 case X86::TZCNT32rr:
6698 case X86::TZCNT64rm:
6699 case X86::TZCNT64rr:
6700 return Subtarget.hasLZCNTFalseDeps();
6701 }
6702
6703 return false;
6704}
6705
6706/// Inform the BreakFalseDeps pass how many idle
6707/// instructions we would like before a partial register update.
6709 const MachineInstr &MI, unsigned OpNum,
6710 const TargetRegisterInfo *TRI) const {
6711 if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget))
6712 return 0;
6713
6714 // If MI is marked as reading Reg, the partial register update is wanted.
6715 const MachineOperand &MO = MI.getOperand(0);
6716 Register Reg = MO.getReg();
6717 if (Reg.isVirtual()) {
6718 if (MO.readsReg() || MI.readsVirtualRegister(Reg))
6719 return 0;
6720 } else {
6721 if (MI.readsRegister(Reg, TRI))
6722 return 0;
6723 }
6724
6725 // If any instructions in the clearance range are reading Reg, insert a
6726 // dependency breaking instruction, which is inexpensive and is likely to
6727 // be hidden in other instruction's cycles.
6729}
6730
6731// Return true for any instruction the copies the high bits of the first source
6732// operand into the unused high bits of the destination operand.
6733// Also returns true for instructions that have two inputs where one may
6734// be undef and we want it to use the same register as the other input.
6735static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
6736 bool ForLoadFold = false) {
6737 // Set the OpNum parameter to the first source operand.
6738 switch (Opcode) {
6739 case X86::MMX_PUNPCKHBWrr:
6740 case X86::MMX_PUNPCKHWDrr:
6741 case X86::MMX_PUNPCKHDQrr:
6742 case X86::MMX_PUNPCKLBWrr:
6743 case X86::MMX_PUNPCKLWDrr:
6744 case X86::MMX_PUNPCKLDQrr:
6745 case X86::MOVHLPSrr:
6746 case X86::PACKSSWBrr:
6747 case X86::PACKUSWBrr:
6748 case X86::PACKSSDWrr:
6749 case X86::PACKUSDWrr:
6750 case X86::PUNPCKHBWrr:
6751 case X86::PUNPCKLBWrr:
6752 case X86::PUNPCKHWDrr:
6753 case X86::PUNPCKLWDrr:
6754 case X86::PUNPCKHDQrr:
6755 case X86::PUNPCKLDQrr:
6756 case X86::PUNPCKHQDQrr:
6757 case X86::PUNPCKLQDQrr:
6758 case X86::SHUFPDrri:
6759 case X86::SHUFPSrri:
6760 // These instructions are sometimes used with an undef first or second
6761 // source. Return true here so BreakFalseDeps will assign this source to the
6762 // same register as the first source to avoid a false dependency.
6763 // Operand 1 of these instructions is tied so they're separate from their
6764 // VEX counterparts.
6765 return OpNum == 2 && !ForLoadFold;
6766
6767 case X86::VMOVLHPSrr:
6768 case X86::VMOVLHPSZrr:
6769 case X86::VPACKSSWBrr:
6770 case X86::VPACKUSWBrr:
6771 case X86::VPACKSSDWrr:
6772 case X86::VPACKUSDWrr:
6773 case X86::VPACKSSWBZ128rr:
6774 case X86::VPACKUSWBZ128rr:
6775 case X86::VPACKSSDWZ128rr:
6776 case X86::VPACKUSDWZ128rr:
6777 case X86::VPERM2F128rr:
6778 case X86::VPERM2I128rr:
6779 case X86::VSHUFF32X4Z256rri:
6780 case X86::VSHUFF32X4Zrri:
6781 case X86::VSHUFF64X2Z256rri:
6782 case X86::VSHUFF64X2Zrri:
6783 case X86::VSHUFI32X4Z256rri:
6784 case X86::VSHUFI32X4Zrri:
6785 case X86::VSHUFI64X2Z256rri:
6786 case X86::VSHUFI64X2Zrri:
6787 case X86::VPUNPCKHBWrr:
6788 case X86::VPUNPCKLBWrr:
6789 case X86::VPUNPCKHBWYrr:
6790 case X86::VPUNPCKLBWYrr:
6791 case X86::VPUNPCKHBWZ128rr:
6792 case X86::VPUNPCKLBWZ128rr:
6793 case X86::VPUNPCKHBWZ256rr:
6794 case X86::VPUNPCKLBWZ256rr:
6795 case X86::VPUNPCKHBWZrr:
6796 case X86::VPUNPCKLBWZrr:
6797 case X86::VPUNPCKHWDrr:
6798 case X86::VPUNPCKLWDrr:
6799 case X86::VPUNPCKHWDYrr:
6800 case X86::VPUNPCKLWDYrr:
6801 case X86::VPUNPCKHWDZ128rr:
6802 case X86::VPUNPCKLWDZ128rr:
6803 case X86::VPUNPCKHWDZ256rr:
6804 case X86::VPUNPCKLWDZ256rr:
6805 case X86::VPUNPCKHWDZrr:
6806 case X86::VPUNPCKLWDZrr:
6807 case X86::VPUNPCKHDQrr:
6808 case X86::VPUNPCKLDQrr:
6809 case X86::VPUNPCKHDQYrr:
6810 case X86::VPUNPCKLDQYrr:
6811 case X86::VPUNPCKHDQZ128rr:
6812 case X86::VPUNPCKLDQZ128rr:
6813 case X86::VPUNPCKHDQZ256rr:
6814 case X86::VPUNPCKLDQZ256rr:
6815 case X86::VPUNPCKHDQZrr:
6816 case X86::VPUNPCKLDQZrr:
6817 case X86::VPUNPCKHQDQrr:
6818 case X86::VPUNPCKLQDQrr:
6819 case X86::VPUNPCKHQDQYrr:
6820 case X86::VPUNPCKLQDQYrr:
6821 case X86::VPUNPCKHQDQZ128rr:
6822 case X86::VPUNPCKLQDQZ128rr:
6823 case X86::VPUNPCKHQDQZ256rr:
6824 case X86::VPUNPCKLQDQZ256rr:
6825 case X86::VPUNPCKHQDQZrr:
6826 case X86::VPUNPCKLQDQZrr:
6827 // These instructions are sometimes used with an undef first or second
6828 // source. Return true here so BreakFalseDeps will assign this source to the
6829 // same register as the first source to avoid a false dependency.
6830 return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
6831
6832 case X86::VCVTSI2SSrr:
6833 case X86::VCVTSI2SSrm:
6834 case X86::VCVTSI2SSrr_Int:
6835 case X86::VCVTSI2SSrm_Int:
6836 case X86::VCVTSI642SSrr:
6837 case X86::VCVTSI642SSrm:
6838 case X86::VCVTSI642SSrr_Int:
6839 case X86::VCVTSI642SSrm_Int:
6840 case X86::VCVTSI2SDrr:
6841 case X86::VCVTSI2SDrm:
6842 case X86::VCVTSI2SDrr_Int:
6843 case X86::VCVTSI2SDrm_Int:
6844 case X86::VCVTSI642SDrr:
6845 case X86::VCVTSI642SDrm:
6846 case X86::VCVTSI642SDrr_Int:
6847 case X86::VCVTSI642SDrm_Int:
6848 // AVX-512
6849 case X86::VCVTSI2SSZrr:
6850 case X86::VCVTSI2SSZrm:
6851 case X86::VCVTSI2SSZrr_Int:
6852 case X86::VCVTSI2SSZrrb_Int:
6853 case X86::VCVTSI2SSZrm_Int:
6854 case X86::VCVTSI642SSZrr:
6855 case X86::VCVTSI642SSZrm:
6856 case X86::VCVTSI642SSZrr_Int:
6857 case X86::VCVTSI642SSZrrb_Int:
6858 case X86::VCVTSI642SSZrm_Int:
6859 case X86::VCVTSI2SDZrr:
6860 case X86::VCVTSI2SDZrm:
6861 case X86::VCVTSI2SDZrr_Int:
6862 case X86::VCVTSI2SDZrm_Int:
6863 case X86::VCVTSI642SDZrr:
6864 case X86::VCVTSI642SDZrm:
6865 case X86::VCVTSI642SDZrr_Int:
6866 case X86::VCVTSI642SDZrrb_Int:
6867 case X86::VCVTSI642SDZrm_Int:
6868 case X86::VCVTUSI2SSZrr:
6869 case X86::VCVTUSI2SSZrm:
6870 case X86::VCVTUSI2SSZrr_Int:
6871 case X86::VCVTUSI2SSZrrb_Int:
6872 case X86::VCVTUSI2SSZrm_Int:
6873 case X86::VCVTUSI642SSZrr:
6874 case X86::VCVTUSI642SSZrm:
6875 case X86::VCVTUSI642SSZrr_Int:
6876 case X86::VCVTUSI642SSZrrb_Int:
6877 case X86::VCVTUSI642SSZrm_Int:
6878 case X86::VCVTUSI2SDZrr:
6879 case X86::VCVTUSI2SDZrm:
6880 case X86::VCVTUSI2SDZrr_Int:
6881 case X86::VCVTUSI2SDZrm_Int:
6882 case X86::VCVTUSI642SDZrr:
6883 case X86::VCVTUSI642SDZrm:
6884 case X86::VCVTUSI642SDZrr_Int:
6885 case X86::VCVTUSI642SDZrrb_Int:
6886 case X86::VCVTUSI642SDZrm_Int:
6887 case X86::VCVTSI2SHZrr:
6888 case X86::VCVTSI2SHZrm:
6889 case X86::VCVTSI2SHZrr_Int:
6890 case X86::VCVTSI2SHZrrb_Int:
6891 case X86::VCVTSI2SHZrm_Int:
6892 case X86::VCVTSI642SHZrr:
6893 case X86::VCVTSI642SHZrm:
6894 case X86::VCVTSI642SHZrr_Int:
6895 case X86::VCVTSI642SHZrrb_Int:
6896 case X86::VCVTSI642SHZrm_Int:
6897 case X86::VCVTUSI2SHZrr:
6898 case X86::VCVTUSI2SHZrm:
6899 case X86::VCVTUSI2SHZrr_Int:
6900 case X86::VCVTUSI2SHZrrb_Int:
6901 case X86::VCVTUSI2SHZrm_Int:
6902 case X86::VCVTUSI642SHZrr:
6903 case X86::VCVTUSI642SHZrm:
6904 case X86::VCVTUSI642SHZrr_Int:
6905 case X86::VCVTUSI642SHZrrb_Int:
6906 case X86::VCVTUSI642SHZrm_Int:
6907 // Load folding won't effect the undef register update since the input is
6908 // a GPR.
6909 return OpNum == 1 && !ForLoadFold;
6910 case X86::VCVTSD2SSrr:
6911 case X86::VCVTSD2SSrm:
6912 case X86::VCVTSD2SSrr_Int:
6913 case X86::VCVTSD2SSrm_Int:
6914 case X86::VCVTSS2SDrr:
6915 case X86::VCVTSS2SDrm:
6916 case X86::VCVTSS2SDrr_Int:
6917 case X86::VCVTSS2SDrm_Int:
6918 case X86::VRCPSSr:
6919 case X86::VRCPSSr_Int:
6920 case X86::VRCPSSm:
6921 case X86::VRCPSSm_Int:
6922 case X86::VROUNDSDri:
6923 case X86::VROUNDSDmi:
6924 case X86::VROUNDSDri_Int:
6925 case X86::VROUNDSDmi_Int:
6926 case X86::VROUNDSSri:
6927 case X86::VROUNDSSmi:
6928 case X86::VROUNDSSri_Int:
6929 case X86::VROUNDSSmi_Int:
6930 case X86::VRSQRTSSr:
6931 case X86::VRSQRTSSr_Int:
6932 case X86::VRSQRTSSm:
6933 case X86::VRSQRTSSm_Int:
6934 case X86::VSQRTSSr:
6935 case X86::VSQRTSSr_Int:
6936 case X86::VSQRTSSm:
6937 case X86::VSQRTSSm_Int:
6938 case X86::VSQRTSDr:
6939 case X86::VSQRTSDr_Int:
6940 case X86::VSQRTSDm:
6941 case X86::VSQRTSDm_Int:
6942 // AVX-512
6943 case X86::VCVTSD2SSZrr:
6944 case X86::VCVTSD2SSZrr_Int:
6945 case X86::VCVTSD2SSZrrb_Int:
6946 case X86::VCVTSD2SSZrm:
6947 case X86::VCVTSD2SSZrm_Int:
6948 case X86::VCVTSS2SDZrr:
6949 case X86::VCVTSS2SDZrr_Int:
6950 case X86::VCVTSS2SDZrrb_Int:
6951 case X86::VCVTSS2SDZrm:
6952 case X86::VCVTSS2SDZrm_Int:
6953 case X86::VGETEXPSDZr:
6954 case X86::VGETEXPSDZrb:
6955 case X86::VGETEXPSDZm:
6956 case X86::VGETEXPSSZr:
6957 case X86::VGETEXPSSZrb:
6958 case X86::VGETEXPSSZm:
6959 case X86::VGETMANTSDZrri:
6960 case X86::VGETMANTSDZrrib:
6961 case X86::VGETMANTSDZrmi:
6962 case X86::VGETMANTSSZrri:
6963 case X86::VGETMANTSSZrrib:
6964 case X86::VGETMANTSSZrmi:
6965 case X86::VRNDSCALESDZr:
6966 case X86::VRNDSCALESDZr_Int:
6967 case X86::VRNDSCALESDZrb_Int:
6968 case X86::VRNDSCALESDZm:
6969 case X86::VRNDSCALESDZm_Int:
6970 case X86::VRNDSCALESSZr:
6971 case X86::VRNDSCALESSZr_Int:
6972 case X86::VRNDSCALESSZrb_Int:
6973 case X86::VRNDSCALESSZm:
6974 case X86::VRNDSCALESSZm_Int:
6975 case X86::VRCP14SDZrr:
6976 case X86::VRCP14SDZrm:
6977 case X86::VRCP14SSZrr:
6978 case X86::VRCP14SSZrm:
6979 case X86::VRCPSHZrr:
6980 case X86::VRCPSHZrm:
6981 case X86::VRSQRTSHZrr:
6982 case X86::VRSQRTSHZrm:
6983 case X86::VREDUCESHZrmi:
6984 case X86::VREDUCESHZrri:
6985 case X86::VREDUCESHZrrib:
6986 case X86::VGETEXPSHZr:
6987 case X86::VGETEXPSHZrb:
6988 case X86::VGETEXPSHZm:
6989 case X86::VGETMANTSHZrri:
6990 case X86::VGETMANTSHZrrib:
6991 case X86::VGETMANTSHZrmi:
6992 case X86::VRNDSCALESHZr:
6993 case X86::VRNDSCALESHZr_Int:
6994 case X86::VRNDSCALESHZrb_Int:
6995 case X86::VRNDSCALESHZm:
6996 case X86::VRNDSCALESHZm_Int:
6997 case X86::VSQRTSHZr:
6998 case X86::VSQRTSHZr_Int:
6999 case X86::VSQRTSHZrb_Int:
7000 case X86::VSQRTSHZm:
7001 case X86::VSQRTSHZm_Int:
7002 case X86::VRCP28SDZr:
7003 case X86::VRCP28SDZrb:
7004 case X86::VRCP28SDZm:
7005 case X86::VRCP28SSZr:
7006 case X86::VRCP28SSZrb:
7007 case X86::VRCP28SSZm:
7008 case X86::VREDUCESSZrmi:
7009 case X86::VREDUCESSZrri:
7010 case X86::VREDUCESSZrrib:
7011 case X86::VRSQRT14SDZrr:
7012 case X86::VRSQRT14SDZrm:
7013 case X86::VRSQRT14SSZrr:
7014 case X86::VRSQRT14SSZrm:
7015 case X86::VRSQRT28SDZr:
7016 case X86::VRSQRT28SDZrb:
7017 case X86::VRSQRT28SDZm:
7018 case X86::VRSQRT28SSZr:
7019 case X86::VRSQRT28SSZrb:
7020 case X86::VRSQRT28SSZm:
7021 case X86::VSQRTSSZr:
7022 case X86::VSQRTSSZr_Int:
7023 case X86::VSQRTSSZrb_Int:
7024 case X86::VSQRTSSZm:
7025 case X86::VSQRTSSZm_Int:
7026 case X86::VSQRTSDZr:
7027 case X86::VSQRTSDZr_Int:
7028 case X86::VSQRTSDZrb_Int:
7029 case X86::VSQRTSDZm:
7030 case X86::VSQRTSDZm_Int:
7031 case X86::VCVTSD2SHZrr:
7032 case X86::VCVTSD2SHZrr_Int:
7033 case X86::VCVTSD2SHZrrb_Int:
7034 case X86::VCVTSD2SHZrm:
7035 case X86::VCVTSD2SHZrm_Int:
7036 case X86::VCVTSS2SHZrr:
7037 case X86::VCVTSS2SHZrr_Int:
7038 case X86::VCVTSS2SHZrrb_Int:
7039 case X86::VCVTSS2SHZrm:
7040 case X86::VCVTSS2SHZrm_Int:
7041 case X86::VCVTSH2SDZrr:
7042 case X86::VCVTSH2SDZrr_Int:
7043 case X86::VCVTSH2SDZrrb_Int:
7044 case X86::VCVTSH2SDZrm:
7045 case X86::VCVTSH2SDZrm_Int:
7046 case X86::VCVTSH2SSZrr:
7047 case X86::VCVTSH2SSZrr_Int:
7048 case X86::VCVTSH2SSZrrb_Int:
7049 case X86::VCVTSH2SSZrm:
7050 case X86::VCVTSH2SSZrm_Int:
7051 return OpNum == 1;
7052 case X86::VMOVSSZrrk:
7053 case X86::VMOVSDZrrk:
7054 return OpNum == 3 && !ForLoadFold;
7055 case X86::VMOVSSZrrkz:
7056 case X86::VMOVSDZrrkz:
7057 return OpNum == 2 && !ForLoadFold;
7058 }
7059
7060 return false;
7061}
7062
7063/// Inform the BreakFalseDeps pass how many idle instructions we would like
7064/// before certain undef register reads.
7065///
7066/// This catches the VCVTSI2SD family of instructions:
7067///
7068/// vcvtsi2sdq %rax, undef %xmm0, %xmm14
7069///
7070/// We should to be careful *not* to catch VXOR idioms which are presumably
7071/// handled specially in the pipeline:
7072///
7073/// vxorps undef %xmm1, undef %xmm1, %xmm1
7074///
7075/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
7076/// high bits that are passed-through are not live.
7077unsigned
7079 const TargetRegisterInfo *TRI) const {
7080 const MachineOperand &MO = MI.getOperand(OpNum);
7081 if (MO.getReg().isPhysical() && hasUndefRegUpdate(MI.getOpcode(), OpNum))
7082 return UndefRegClearance;
7083
7084 return 0;
7085}
7086
7088 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
7089 Register Reg = MI.getOperand(OpNum).getReg();
7090 // If MI kills this register, the false dependence is already broken.
7091 if (MI.killsRegister(Reg, TRI))
7092 return;
7093
7094 if (X86::VR128RegClass.contains(Reg)) {
7095 // These instructions are all floating point domain, so xorps is the best
7096 // choice.
7097 unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
7098 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg)
7099 .addReg(Reg, RegState::Undef)
7100 .addReg(Reg, RegState::Undef);
7101 MI.addRegisterKilled(Reg, TRI, true);
7102 } else if (X86::VR256RegClass.contains(Reg)) {
7103 // Use vxorps to clear the full ymm register.
7104 // It wants to read and write the xmm sub-register.
7105 Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
7106 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
7107 .addReg(XReg, RegState::Undef)
7108 .addReg(XReg, RegState::Undef)
7110 MI.addRegisterKilled(Reg, TRI, true);
7111 } else if (X86::VR128XRegClass.contains(Reg)) {
7112 // Only handle VLX targets.
7113 if (!Subtarget.hasVLX())
7114 return;
7115 // Since vxorps requires AVX512DQ, vpxord should be the best choice.
7116 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), Reg)
7117 .addReg(Reg, RegState::Undef)
7118 .addReg(Reg, RegState::Undef);
7119 MI.addRegisterKilled(Reg, TRI, true);
7120 } else if (X86::VR256XRegClass.contains(Reg) ||
7121 X86::VR512RegClass.contains(Reg)) {
7122 // Only handle VLX targets.
7123 if (!Subtarget.hasVLX())
7124 return;
7125 // Use vpxord to clear the full ymm/zmm register.
7126 // It wants to read and write the xmm sub-register.
7127 Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
7128 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), XReg)
7129 .addReg(XReg, RegState::Undef)
7130 .addReg(XReg, RegState::Undef)
7132 MI.addRegisterKilled(Reg, TRI, true);
7133 } else if (X86::GR64RegClass.contains(Reg)) {
7134 // Using XOR32rr because it has shorter encoding and zeros up the upper bits
7135 // as well.
7136 Register XReg = TRI->getSubReg(Reg, X86::sub_32bit);
7137 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg)
7138 .addReg(XReg, RegState::Undef)
7139 .addReg(XReg, RegState::Undef)
7141 MI.addRegisterKilled(Reg, TRI, true);
7142 } else if (X86::GR32RegClass.contains(Reg)) {
7143 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg)
7144 .addReg(Reg, RegState::Undef)
7145 .addReg(Reg, RegState::Undef);
7146 MI.addRegisterKilled(Reg, TRI, true);
7147 }
7148}
7149
7151 int PtrOffset = 0) {
7152 unsigned NumAddrOps = MOs.size();
7153
7154 if (NumAddrOps < 4) {
7155 // FrameIndex only - add an immediate offset (whether its zero or not).
7156 for (unsigned i = 0; i != NumAddrOps; ++i)
7157 MIB.add(MOs[i]);
7158 addOffset(MIB, PtrOffset);
7159 } else {
7160 // General Memory Addressing - we need to add any offset to an existing
7161 // offset.
7162 assert(MOs.size() == 5 && "Unexpected memory operand list length");
7163 for (unsigned i = 0; i != NumAddrOps; ++i) {
7164 const MachineOperand &MO = MOs[i];
7165 if (i == 3 && PtrOffset != 0) {
7166 MIB.addDisp(MO, PtrOffset);
7167 } else {
7168 MIB.add(MO);
7169 }
7170 }
7171 }
7172}
7173
7175 MachineInstr &NewMI,
7176 const TargetInstrInfo &TII) {
7178 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
7179
7180 for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) {
7181 MachineOperand &MO = NewMI.getOperand(Idx);
7182 // We only need to update constraints on virtual register operands.
7183 if (!MO.isReg())
7184 continue;
7185 Register Reg = MO.getReg();
7186 if (!Reg.isVirtual())
7187 continue;
7188
7189 auto *NewRC = MRI.constrainRegClass(
7190 Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF));
7191 if (!NewRC) {
7192 LLVM_DEBUG(
7193 dbgs() << "WARNING: Unable to update register constraint for operand "
7194 << Idx << " of instruction:\n";
7195 NewMI.dump(); dbgs() << "\n");
7196 }
7197 }
7198}
7199
7200static MachineInstr *fuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
7204 const TargetInstrInfo &TII) {
7205 // Create the base instruction with the memory operand as the first part.
7206 // Omit the implicit operands, something BuildMI can't do.
7207 MachineInstr *NewMI =
7208 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
7209 MachineInstrBuilder MIB(MF, NewMI);
7210 addOperands(MIB, MOs);
7211
7212 // Loop over the rest of the ri operands, converting them over.
7213 unsigned NumOps = MI.getDesc().getNumOperands() - 2;
7214 for (unsigned i = 0; i != NumOps; ++i) {
7215 MachineOperand &MO = MI.getOperand(i + 2);
7216 MIB.add(MO);
7217 }
7218 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), NumOps + 2))
7219 MIB.add(MO);
7220
7221 updateOperandRegConstraints(MF, *NewMI, TII);
7222
7223 MachineBasicBlock *MBB = InsertPt->getParent();
7224 MBB->insert(InsertPt, NewMI);
7225
7226 return MIB;
7227}
7228
7229static MachineInstr *fuseInst(MachineFunction &MF, unsigned Opcode,
7230 unsigned OpNo, ArrayRef<MachineOperand> MOs,
7233 int PtrOffset = 0) {
7234 // Omit the implicit operands, something BuildMI can't do.
7235 MachineInstr *NewMI =
7236 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
7237 MachineInstrBuilder MIB(MF, NewMI);
7238
7239 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
7240 MachineOperand &MO = MI.getOperand(i);
7241 if (i == OpNo) {
7242 assert(MO.isReg() && "Expected to fold into reg operand!");
7243 addOperands(MIB, MOs, PtrOffset);
7244 } else {
7245 MIB.add(MO);
7246 }
7247 }
7248
7249 updateOperandRegConstraints(MF, *NewMI, TII);
7250
7251 // Copy the NoFPExcept flag from the instruction we're fusing.
7254
7255 MachineBasicBlock *MBB = InsertPt->getParent();
7256 MBB->insert(InsertPt, NewMI);
7257
7258 return MIB;
7259}
7260
7261static MachineInstr *makeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
7264 MachineInstr &MI) {
7265 MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
7266 MI.getDebugLoc(), TII.get(Opcode));
7267 addOperands(MIB, MOs);
7268 return MIB.addImm(0);
7269}
7270
7271MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
7272 MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
7274 unsigned Size, Align Alignment) const {
7275 switch (MI.getOpcode()) {
7276 case X86::INSERTPSrr:
7277 case X86::VINSERTPSrr:
7278 case X86::VINSERTPSZrr:
7279 // Attempt to convert the load of inserted vector into a fold load
7280 // of a single float.
7281 if (OpNum == 2) {
7282 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
7283 unsigned ZMask = Imm & 15;
7284 unsigned DstIdx = (Imm >> 4) & 3;
7285 unsigned SrcIdx = (Imm >> 6) & 3;
7286
7288 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7289 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7290 if ((Size == 0 || Size >= 16) && RCSize >= 16 &&
7291 (MI.getOpcode() != X86::INSERTPSrr || Alignment >= Align(4))) {
7292 int PtrOffset = SrcIdx * 4;
7293 unsigned NewImm = (DstIdx << 4) | ZMask;
7294 unsigned NewOpCode =
7295 (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm
7296 : (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm
7297 : X86::INSERTPSrm;
7298 MachineInstr *NewMI =
7299 fuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
7300 NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
7301 return NewMI;
7302 }
7303 }
7304 break;
7305 case X86::MOVHLPSrr:
7306 case X86::VMOVHLPSrr:
7307 case X86::VMOVHLPSZrr:
7308 // Move the upper 64-bits of the second operand to the lower 64-bits.
7309 // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
7310 // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
7311 if (OpNum == 2) {
7313 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7314 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7315 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) {
7316 unsigned NewOpCode =
7317 (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm
7318 : (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm
7319 : X86::MOVLPSrm;
7320 MachineInstr *NewMI =
7321 fuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
7322 return NewMI;
7323 }
7324 }
7325 break;
7326 case X86::UNPCKLPDrr:
7327 // If we won't be able to fold this to the memory form of UNPCKL, use
7328 // MOVHPD instead. Done as custom because we can't have this in the load
7329 // table twice.
7330 if (OpNum == 2) {
7332 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7333 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7334 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) {
7335 MachineInstr *NewMI =
7336 fuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
7337 return NewMI;
7338 }
7339 }
7340 break;
7341 case X86::MOV32r0:
7342 if (auto *NewMI =
7343 makeM0Inst(*this, (Size == 4) ? X86::MOV32mi : X86::MOV64mi32, MOs,
7344 InsertPt, MI))
7345 return NewMI;
7346 break;
7347 }
7348
7349 return nullptr;
7350}
7351
7353 MachineInstr &MI) {
7354 if (!hasUndefRegUpdate(MI.getOpcode(), 1, /*ForLoadFold*/ true) ||
7355 !MI.getOperand(1).isReg())
7356 return false;
7357
7358 // The are two cases we need to handle depending on where in the pipeline
7359 // the folding attempt is being made.
7360 // -Register has the undef flag set.
7361 // -Register is produced by the IMPLICIT_DEF instruction.
7362
7363 if (MI.getOperand(1).isUndef())
7364 return true;
7365
7367 MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
7368 return VRegDef && VRegDef->isImplicitDef();
7369}
7370
7371unsigned X86InstrInfo::commuteOperandsForFold(MachineInstr &MI,
7372 unsigned Idx1) const {
7373 unsigned Idx2 = CommuteAnyOperandIndex;
7374 if (!findCommutedOpIndices(MI, Idx1, Idx2))
7375 return Idx1;
7376
7377 bool HasDef = MI.getDesc().getNumDefs();
7378 Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
7379 Register Reg1 = MI.getOperand(Idx1).getReg();
7380 Register Reg2 = MI.getOperand(Idx2).getReg();
7381 bool Tied1 = 0 == MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO);
7382 bool Tied2 = 0 == MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO);
7383
7384 // If either of the commutable operands are tied to the destination
7385 // then we can not commute + fold.
7386 if ((HasDef && Reg0 == Reg1 && Tied1) || (HasDef && Reg0 == Reg2 && Tied2))
7387 return Idx1;
7388
7389 return commuteInstruction(MI, false, Idx1, Idx2) ? Idx2 : Idx1;
7390}
7391
7392static void printFailMsgforFold(const MachineInstr &MI, unsigned Idx) {
7393 if (PrintFailedFusing && !MI.isCopy())
7394 dbgs() << "We failed to fuse operand " << Idx << " in " << MI;
7395}
7396
7398 MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
7400 unsigned Size, Align Alignment, bool AllowCommute) const {
7401 bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
7402 unsigned Opc = MI.getOpcode();
7403
7404 // For CPUs that favor the register form of a call or push,
7405 // do not fold loads into calls or pushes, unless optimizing for size
7406 // aggressively.
7407 if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() &&
7408 (Opc == X86::CALL32r || Opc == X86::CALL64r || Opc == X86::PUSH16r ||
7409 Opc == X86::PUSH32r || Opc == X86::PUSH64r))
7410 return nullptr;
7411
7412 // Avoid partial and undef register update stalls unless optimizing for size.
7413 if (!MF.getFunction().hasOptSize() &&
7414 (hasPartialRegUpdate(Opc, Subtarget, /*ForLoadFold*/ true) ||
7416 return nullptr;
7417
7418 unsigned NumOps = MI.getDesc().getNumOperands();
7419 bool IsTwoAddr = NumOps > 1 && OpNum < 2 && MI.getOperand(0).isReg() &&
7420 MI.getOperand(1).isReg() &&
7421 MI.getOperand(0).getReg() == MI.getOperand(1).getReg();
7422
7423 // FIXME: AsmPrinter doesn't know how to handle
7424 // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
7425 if (Opc == X86::ADD32ri &&
7426 MI.getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
7427 return nullptr;
7428
7429 // GOTTPOFF relocation loads can only be folded into add instructions.
7430 // FIXME: Need to exclude other relocations that only support specific
7431 // instructions.
7432 if (MOs.size() == X86::AddrNumOperands &&
7433 MOs[X86::AddrDisp].getTargetFlags() == X86II::MO_GOTTPOFF &&
7434 Opc != X86::ADD64rr)
7435 return nullptr;
7436
7437 // Don't fold loads into indirect calls that need a KCFI check as we'll
7438 // have to unfold these in X86TargetLowering::EmitKCFICheck anyway.
7439 if (MI.isCall() && MI.getCFIType())
7440 return nullptr;
7441
7442 // Attempt to fold any custom cases we have.
7443 if (auto *CustomMI = foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt,
7444 Size, Alignment))
7445 return CustomMI;
7446
7447 // Folding a memory location into the two-address part of a two-address
7448 // instruction is different than folding it other places. It requires
7449 // replacing the *two* registers with the memory location.
7450 //
7451 // Utilize the mapping NonNDD -> RMW for the NDD variant.
7452 unsigned NonNDOpc = Subtarget.hasNDD() ? X86::getNonNDVariant(Opc) : 0U;
7453 const X86FoldTableEntry *I =
7454 IsTwoAddr ? lookupTwoAddrFoldTable(NonNDOpc ? NonNDOpc : Opc)
7455 : lookupFoldTable(Opc, OpNum);
7456
7457 MachineInstr *NewMI = nullptr;
7458 if (I) {
7459 unsigned Opcode = I->DstOp;
7460 if (Alignment <
7461 Align(1ULL << ((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)))
7462 return nullptr;
7463 bool NarrowToMOV32rm = false;
7464 if (Size) {
7466 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7467 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7468 // Check if it's safe to fold the load. If the size of the object is
7469 // narrower than the load width, then it's not.
7470 // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
7471 if ((I->Flags & TB_FOLDED_LOAD) && Size < RCSize) {
7472 // If this is a 64-bit load, but the spill slot is 32, then we can do
7473 // a 32-bit load which is implicitly zero-extended. This likely is
7474 // due to live interval analysis remat'ing a load from stack slot.
7475 if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
7476 return nullptr;
7477 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
7478 return nullptr;
7479 Opcode = X86::MOV32rm;
7480 NarrowToMOV32rm = true;
7481 }
7482 // For stores, make sure the size of the object is equal to the size of
7483 // the store. If the object is larger, the extra bits would be garbage. If
7484 // the object is smaller we might overwrite another object or fault.
7485 if ((I->Flags & TB_FOLDED_STORE) && Size != RCSize)
7486 return nullptr;
7487 }
7488
7489 NewMI = IsTwoAddr ? fuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this)
7490 : fuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
7491
7492 if (NarrowToMOV32rm) {
7493 // If this is the special case where we use a MOV32rm to load a 32-bit
7494 // value and zero-extend the top bits. Change the destination register
7495 // to a 32-bit one.
7496 Register DstReg = NewMI->getOperand(0).getReg();
7497 if (DstReg.isPhysical())
7498 NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
7499 else
7500 NewMI->getOperand(0).setSubReg(X86::sub_32bit);
7501 }
7502 return NewMI;
7503 }
7504
7505 if (AllowCommute) {
7506 // If the instruction and target operand are commutable, commute the
7507 // instruction and try again.
7508 unsigned CommuteOpIdx2 = commuteOperandsForFold(MI, OpNum);
7509 if (CommuteOpIdx2 == OpNum) {
7510 printFailMsgforFold(MI, OpNum);
7511 return nullptr;
7512 }
7513 // Attempt to fold with the commuted version of the instruction.
7514 NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, Size,
7515 Alignment, /*AllowCommute=*/false);
7516 if (NewMI)
7517 return NewMI;
7518 // Folding failed again - undo the commute before returning.
7519 commuteInstruction(MI, false, OpNum, CommuteOpIdx2);
7520 }
7521
7522 printFailMsgforFold(MI, OpNum);
7523 return nullptr;
7524}
7525
7528 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
7529 VirtRegMap *VRM) const {
7530 // Check switch flag
7531 if (NoFusing)
7532 return nullptr;
7533
7534 // Avoid partial and undef register update stalls unless optimizing for size.
7535 if (!MF.getFunction().hasOptSize() &&
7536 (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/ true) ||
7538 return nullptr;
7539
7540 // Don't fold subreg spills, or reloads that use a high subreg.
7541 for (auto Op : Ops) {
7542 MachineOperand &MO = MI.getOperand(Op);
7543 auto SubReg = MO.getSubReg();
7544 // MOV32r0 is special b/c it's used to clear a 64-bit register too.
7545 // (See patterns for MOV32r0 in TD files).
7546 if (MI.getOpcode() == X86::MOV32r0 && SubReg == X86::sub_32bit)
7547 continue;
7548 if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
7549 return nullptr;
7550 }
7551
7552 const MachineFrameInfo &MFI = MF.getFrameInfo();
7553 unsigned Size = MFI.getObjectSize(FrameIndex);
7554 Align Alignment = MFI.getObjectAlign(FrameIndex);
7555 // If the function stack isn't realigned we don't want to fold instructions
7556 // that need increased alignment.
7557 if (!RI.hasStackRealignment(MF))
7558 Alignment =
7559 std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign());
7560
7561 auto Impl = [&]() {
7562 return foldMemoryOperandImpl(MF, MI, Ops[0],
7563 MachineOperand::CreateFI(FrameIndex), InsertPt,
7564 Size, Alignment, /*AllowCommute=*/true);
7565 };
7566 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
7567 unsigned NewOpc = 0;
7568 unsigned RCSize = 0;
7569 unsigned Opc = MI.getOpcode();
7570 switch (Opc) {
7571 default:
7572 // NDD can be folded into RMW though its Op0 and Op1 are not tied.
7573 return (Subtarget.hasNDD() ? X86::getNonNDVariant(Opc) : 0U) ? Impl()
7574 : nullptr;
7575 case X86::TEST8rr:
7576 NewOpc = X86::CMP8ri;
7577 RCSize = 1;
7578 break;
7579 case X86::TEST16rr:
7580 NewOpc = X86::CMP16ri;
7581 RCSize = 2;
7582 break;
7583 case X86::TEST32rr:
7584 NewOpc = X86::CMP32ri;
7585 RCSize = 4;
7586 break;
7587 case X86::TEST64rr:
7588 NewOpc = X86::CMP64ri32;
7589 RCSize = 8;
7590 break;
7591 }
7592 // Check if it's safe to fold the load. If the size of the object is
7593 // narrower than the load width, then it's not.
7594 if (Size < RCSize)
7595 return nullptr;
7596 // Change to CMPXXri r, 0 first.
7597 MI.setDesc(get(NewOpc));
7598 MI.getOperand(1).ChangeToImmediate(0);
7599 } else if (Ops.size() != 1)
7600 return nullptr;
7601
7602 return Impl();
7603}
7604
7605/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
7606/// because the latter uses contents that wouldn't be defined in the folded
7607/// version. For instance, this transformation isn't legal:
7608/// movss (%rdi), %xmm0
7609/// addps %xmm0, %xmm0
7610/// ->
7611/// addps (%rdi), %xmm0
7612///
7613/// But this one is:
7614/// movss (%rdi), %xmm0
7615/// addss %xmm0, %xmm0
7616/// ->
7617/// addss (%rdi), %xmm0
7618///
7620 const MachineInstr &UserMI,
7621 const MachineFunction &MF) {
7622 unsigned Opc = LoadMI.getOpcode();
7623 unsigned UserOpc = UserMI.getOpcode();
7625 const TargetRegisterClass *RC =
7626 MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
7627 unsigned RegSize = TRI.getRegSizeInBits(*RC);
7628
7629 if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm ||
7630 Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt ||
7631 Opc == X86::VMOVSSZrm_alt) &&
7632 RegSize > 32) {
7633 // These instructions only load 32 bits, we can't fold them if the
7634 // destination register is wider than 32 bits (4 bytes), and its user
7635 // instruction isn't scalar (SS).
7636 switch (UserOpc) {
7637 case X86::CVTSS2SDrr_Int:
7638 case X86::VCVTSS2SDrr_Int:
7639 case X86::VCVTSS2SDZrr_Int:
7640 case X86::VCVTSS2SDZrr_Intk:
7641 case X86::VCVTSS2SDZrr_Intkz:
7642 case X86::CVTSS2SIrr_Int:
7643 case X86::CVTSS2SI64rr_Int:
7644 case X86::VCVTSS2SIrr_Int:
7645 case X86::VCVTSS2SI64rr_Int:
7646 case X86::VCVTSS2SIZrr_Int:
7647 case X86::VCVTSS2SI64Zrr_Int:
7648 case X86::CVTTSS2SIrr_Int:
7649 case X86::CVTTSS2SI64rr_Int:
7650 case X86::VCVTTSS2SIrr_Int:
7651 case X86::VCVTTSS2SI64rr_Int:
7652 case X86::VCVTTSS2SIZrr_Int:
7653 case X86::VCVTTSS2SI64Zrr_Int:
7654 case X86::VCVTSS2USIZrr_Int:
7655 case X86::VCVTSS2USI64Zrr_Int:
7656 case X86::VCVTTSS2USIZrr_Int:
7657 case X86::VCVTTSS2USI64Zrr_Int:
7658 case X86::RCPSSr_Int:
7659 case X86::VRCPSSr_Int:
7660 case X86::RSQRTSSr_Int:
7661 case X86::VRSQRTSSr_Int:
7662 case X86::ROUNDSSri_Int:
7663 case X86::VROUNDSSri_Int:
7664 case X86::COMISSrr_Int:
7665 case X86::VCOMISSrr_Int:
7666 case X86::VCOMISSZrr_Int:
7667 case X86::UCOMISSrr_Int:
7668 case X86::VUCOMISSrr_Int:
7669 case X86::VUCOMISSZrr_Int:
7670 case X86::ADDSSrr_Int:
7671 case X86::VADDSSrr_Int:
7672 case X86::VADDSSZrr_Int:
7673 case X86::CMPSSrri_Int:
7674 case X86::VCMPSSrri_Int:
7675 case X86::VCMPSSZrri_Int:
7676 case X86::DIVSSrr_Int:
7677 case X86::VDIVSSrr_Int:
7678 case X86::VDIVSSZrr_Int:
7679 case X86::MAXSSrr_Int:
7680 case X86::VMAXSSrr_Int:
7681 case X86::VMAXSSZrr_Int:
7682 case X86::MINSSrr_Int:
7683 case X86::VMINSSrr_Int:
7684 case X86::VMINSSZrr_Int:
7685 case X86::MULSSrr_Int:
7686 case X86::VMULSSrr_Int:
7687 case X86::VMULSSZrr_Int:
7688 case X86::SQRTSSr_Int:
7689 case X86::VSQRTSSr_Int:
7690 case X86::VSQRTSSZr_Int:
7691 case X86::SUBSSrr_Int:
7692 case X86::VSUBSSrr_Int:
7693 case X86::VSUBSSZrr_Int:
7694 case X86::VADDSSZrr_Intk:
7695 case X86::VADDSSZrr_Intkz:
7696 case X86::VCMPSSZrri_Intk:
7697 case X86::VDIVSSZrr_Intk:
7698 case X86::VDIVSSZrr_Intkz:
7699 case X86::VMAXSSZrr_Intk:
7700 case X86::VMAXSSZrr_Intkz:
7701 case X86::VMINSSZrr_Intk:
7702 case X86::VMINSSZrr_Intkz:
7703 case X86::VMULSSZrr_Intk:
7704 case X86::VMULSSZrr_Intkz:
7705 case X86::VSQRTSSZr_Intk:
7706 case X86::VSQRTSSZr_Intkz:
7707 case X86::VSUBSSZrr_Intk:
7708 case X86::VSUBSSZrr_Intkz:
7709 case X86::VFMADDSS4rr_Int:
7710 case X86::VFNMADDSS4rr_Int:
7711 case X86::VFMSUBSS4rr_Int:
7712 case X86::VFNMSUBSS4rr_Int:
7713 case X86::VFMADD132SSr_Int:
7714 case X86::VFNMADD132SSr_Int:
7715 case X86::VFMADD213SSr_Int:
7716 case X86::VFNMADD213SSr_Int:
7717 case X86::VFMADD231SSr_Int:
7718 case X86::VFNMADD231SSr_Int:
7719 case X86::VFMSUB132SSr_Int:
7720 case X86::VFNMSUB132SSr_Int:
7721 case X86::VFMSUB213SSr_Int:
7722 case X86::VFNMSUB213SSr_Int:
7723 case X86::VFMSUB231SSr_Int:
7724 case X86::VFNMSUB231SSr_Int:
7725 case X86::VFMADD132SSZr_Int:
7726 case X86::VFNMADD132SSZr_Int:
7727 case X86::VFMADD213SSZr_Int:
7728 case X86::VFNMADD213SSZr_Int:
7729 case X86::VFMADD231SSZr_Int:
7730 case X86::VFNMADD231SSZr_Int:
7731 case X86::VFMSUB132SSZr_Int:
7732 case X86::VFNMSUB132SSZr_Int:
7733 case X86::VFMSUB213SSZr_Int:
7734 case X86::VFNMSUB213SSZr_Int:
7735 case X86::VFMSUB231SSZr_Int:
7736 case X86::VFNMSUB231SSZr_Int:
7737 case X86::VFMADD132SSZr_Intk:
7738 case X86::VFNMADD132SSZr_Intk:
7739 case X86::VFMADD213SSZr_Intk:
7740 case X86::VFNMADD213SSZr_Intk:
7741 case X86::VFMADD231SSZr_Intk:
7742 case X86::VFNMADD231SSZr_Intk:
7743 case X86::VFMSUB132SSZr_Intk:
7744 case X86::VFNMSUB132SSZr_Intk:
7745 case X86::VFMSUB213SSZr_Intk:
7746 case X86::VFNMSUB213SSZr_Intk:
7747 case X86::VFMSUB231SSZr_Intk:
7748 case X86::VFNMSUB231SSZr_Intk:
7749 case X86::VFMADD132SSZr_Intkz:
7750 case X86::VFNMADD132SSZr_Intkz:
7751 case X86::VFMADD213SSZr_Intkz:
7752 case X86::VFNMADD213SSZr_Intkz:
7753 case X86::VFMADD231SSZr_Intkz:
7754 case X86::VFNMADD231SSZr_Intkz:
7755 case X86::VFMSUB132SSZr_Intkz:
7756 case X86::VFNMSUB132SSZr_Intkz:
7757 case X86::VFMSUB213SSZr_Intkz:
7758 case X86::VFNMSUB213SSZr_Intkz:
7759 case X86::VFMSUB231SSZr_Intkz:
7760 case X86::VFNMSUB231SSZr_Intkz:
7761 case X86::VFIXUPIMMSSZrri:
7762 case X86::VFIXUPIMMSSZrrik:
7763 case X86::VFIXUPIMMSSZrrikz:
7764 case X86::VFPCLASSSSZrr:
7765 case X86::VFPCLASSSSZrrk:
7766 case X86::VGETEXPSSZr:
7767 case X86::VGETEXPSSZrk:
7768 case X86::VGETEXPSSZrkz:
7769 case X86::VGETMANTSSZrri:
7770 case X86::VGETMANTSSZrrik:
7771 case X86::VGETMANTSSZrrikz:
7772 case X86::VRANGESSZrri:
7773 case X86::VRANGESSZrrik:
7774 case X86::VRANGESSZrrikz:
7775 case X86::VRCP14SSZrr:
7776 case X86::VRCP14SSZrrk:
7777 case X86::VRCP14SSZrrkz:
7778 case X86::VRCP28SSZr:
7779 case X86::VRCP28SSZrk:
7780 case X86::VRCP28SSZrkz:
7781 case X86::VREDUCESSZrri:
7782 case X86::VREDUCESSZrrik:
7783 case X86::VREDUCESSZrrikz:
7784 case X86::VRNDSCALESSZr_Int:
7785 case X86::VRNDSCALESSZr_Intk:
7786 case X86::VRNDSCALESSZr_Intkz:
7787 case X86::VRSQRT14SSZrr:
7788 case X86::VRSQRT14SSZrrk:
7789 case X86::VRSQRT14SSZrrkz:
7790 case X86::VRSQRT28SSZr:
7791 case X86::VRSQRT28SSZrk:
7792 case X86::VRSQRT28SSZrkz:
7793 case X86::VSCALEFSSZrr:
7794 case X86::VSCALEFSSZrrk:
7795 case X86::VSCALEFSSZrrkz:
7796 return false;
7797 default:
7798 return true;
7799 }
7800 }
7801
7802 if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm ||
7803 Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt ||
7804 Opc == X86::VMOVSDZrm_alt) &&
7805 RegSize > 64) {
7806 // These instructions only load 64 bits, we can't fold them if the
7807 // destination register is wider than 64 bits (8 bytes), and its user
7808 // instruction isn't scalar (SD).
7809 switch (UserOpc) {
7810 case X86::CVTSD2SSrr_Int:
7811 case X86::VCVTSD2SSrr_Int:
7812 case X86::VCVTSD2SSZrr_Int:
7813 case X86::VCVTSD2SSZrr_Intk:
7814 case X86::VCVTSD2SSZrr_Intkz:
7815 case X86::CVTSD2SIrr_Int:
7816 case X86::CVTSD2SI64rr_Int:
7817 case X86::VCVTSD2SIrr_Int:
7818 case X86::VCVTSD2SI64rr_Int:
7819 case X86::VCVTSD2SIZrr_Int:
7820 case X86::VCVTSD2SI64Zrr_Int:
7821 case X86::CVTTSD2SIrr_Int:
7822 case X86::CVTTSD2SI64rr_Int:
7823 case X86::VCVTTSD2SIrr_Int:
7824 case X86::VCVTTSD2SI64rr_Int:
7825 case X86::VCVTTSD2SIZrr_Int:
7826 case X86::VCVTTSD2SI64Zrr_Int:
7827 case X86::VCVTSD2USIZrr_Int:
7828 case X86::VCVTSD2USI64Zrr_Int:
7829 case X86::VCVTTSD2USIZrr_Int:
7830 case X86::VCVTTSD2USI64Zrr_Int:
7831 case X86::ROUNDSDri_Int:
7832 case X86::VROUNDSDri_Int:
7833 case X86::COMISDrr_Int:
7834 case X86::VCOMISDrr_Int:
7835 case X86::VCOMISDZrr_Int:
7836 case X86::UCOMISDrr_Int:
7837 case X86::VUCOMISDrr_Int:
7838 case X86::VUCOMISDZrr_Int:
7839 case X86::ADDSDrr_Int:
7840 case X86::VADDSDrr_Int:
7841 case X86::VADDSDZrr_Int:
7842 case X86::CMPSDrri_Int:
7843 case X86::VCMPSDrri_Int:
7844 case X86::VCMPSDZrri_Int:
7845 case X86::DIVSDrr_Int:
7846 case X86::VDIVSDrr_Int:
7847 case X86::VDIVSDZrr_Int:
7848 case X86::MAXSDrr_Int:
7849 case X86::VMAXSDrr_Int:
7850 case X86::VMAXSDZrr_Int:
7851 case X86::MINSDrr_Int:
7852 case X86::VMINSDrr_Int:
7853 case X86::VMINSDZrr_Int:
7854 case X86::MULSDrr_Int:
7855 case X86::VMULSDrr_Int:
7856 case X86::VMULSDZrr_Int:
7857 case X86::SQRTSDr_Int:
7858 case X86::VSQRTSDr_Int:
7859 case X86::VSQRTSDZr_Int:
7860 case X86::SUBSDrr_Int:
7861 case X86::VSUBSDrr_Int:
7862 case X86::VSUBSDZrr_Int:
7863 case X86::VADDSDZrr_Intk:
7864 case X86::VADDSDZrr_Intkz:
7865 case X86::VCMPSDZrri_Intk:
7866 case X86::VDIVSDZrr_Intk:
7867 case X86::VDIVSDZrr_Intkz:
7868 case X86::VMAXSDZrr_Intk:
7869 case X86::VMAXSDZrr_Intkz:
7870 case X86::VMINSDZrr_Intk:
7871 case X86::VMINSDZrr_Intkz:
7872 case X86::VMULSDZrr_Intk:
7873 case X86::VMULSDZrr_Intkz:
7874 case X86::VSQRTSDZr_Intk:
7875 case X86::VSQRTSDZr_Intkz:
7876 case X86::VSUBSDZrr_Intk:
7877 case X86::VSUBSDZrr_Intkz:
7878 case X86::VFMADDSD4rr_Int:
7879 case X86::VFNMADDSD4rr_Int:
7880 case X86::VFMSUBSD4rr_Int:
7881 case X86::VFNMSUBSD4rr_Int:
7882 case X86::VFMADD132SDr_Int:
7883 case X86::VFNMADD132SDr_Int:
7884 case X86::VFMADD213SDr_Int:
7885 case X86::VFNMADD213SDr_Int:
7886 case X86::VFMADD231SDr_Int:
7887 case X86::VFNMADD231SDr_Int:
7888 case X86::VFMSUB132SDr_Int:
7889 case X86::VFNMSUB132SDr_Int:
7890 case X86::VFMSUB213SDr_Int:
7891 case X86::VFNMSUB213SDr_Int:
7892 case X86::VFMSUB231SDr_Int:
7893 case X86::VFNMSUB231SDr_Int:
7894 case X86::VFMADD132SDZr_Int:
7895 case X86::VFNMADD132SDZr_Int:
7896 case X86::VFMADD213SDZr_Int:
7897 case X86::VFNMADD213SDZr_Int:
7898 case X86::VFMADD231SDZr_Int:
7899 case X86::VFNMADD231SDZr_Int:
7900 case X86::VFMSUB132SDZr_Int:
7901 case X86::VFNMSUB132SDZr_Int:
7902 case X86::VFMSUB213SDZr_Int:
7903 case X86::VFNMSUB213SDZr_Int:
7904 case X86::VFMSUB231SDZr_Int:
7905 case X86::VFNMSUB231SDZr_Int:
7906 case X86::VFMADD132SDZr_Intk:
7907 case X86::VFNMADD132SDZr_Intk:
7908 case X86::VFMADD213SDZr_Intk:
7909 case X86::VFNMADD213SDZr_Intk:
7910 case X86::VFMADD231SDZr_Intk:
7911 case X86::VFNMADD231SDZr_Intk:
7912 case X86::VFMSUB132SDZr_Intk:
7913 case X86::VFNMSUB132SDZr_Intk:
7914 case X86::VFMSUB213SDZr_Intk:
7915 case X86::VFNMSUB213SDZr_Intk:
7916 case X86::VFMSUB231SDZr_Intk:
7917 case X86::VFNMSUB231SDZr_Intk:
7918 case X86::VFMADD132SDZr_Intkz:
7919 case X86::VFNMADD132SDZr_Intkz:
7920 case X86::VFMADD213SDZr_Intkz:
7921 case X86::VFNMADD213SDZr_Intkz:
7922 case X86::VFMADD231SDZr_Intkz:
7923 case X86::VFNMADD231SDZr_Intkz:
7924 case X86::VFMSUB132SDZr_Intkz:
7925 case X86::VFNMSUB132SDZr_Intkz:
7926 case X86::VFMSUB213SDZr_Intkz:
7927 case X86::VFNMSUB213SDZr_Intkz:
7928 case X86::VFMSUB231SDZr_Intkz:
7929 case X86::VFNMSUB231SDZr_Intkz:
7930 case X86::VFIXUPIMMSDZrri:
7931 case X86::VFIXUPIMMSDZrrik:
7932 case X86::VFIXUPIMMSDZrrikz:
7933 case X86::VFPCLASSSDZrr:
7934 case X86::VFPCLASSSDZrrk:
7935 case X86::VGETEXPSDZr:
7936 case X86::VGETEXPSDZrk:
7937 case X86::VGETEXPSDZrkz:
7938 case X86::VGETMANTSDZrri:
7939 case X86::VGETMANTSDZrrik:
7940 case X86::VGETMANTSDZrrikz:
7941 case X86::VRANGESDZrri:
7942 case X86::VRANGESDZrrik:
7943 case X86::VRANGESDZrrikz:
7944 case X86::VRCP14SDZrr:
7945 case X86::VRCP14SDZrrk:
7946 case X86::VRCP14SDZrrkz:
7947 case X86::VRCP28SDZr:
7948 case X86::VRCP28SDZrk:
7949 case X86::VRCP28SDZrkz:
7950 case X86::VREDUCESDZrri:
7951 case X86::VREDUCESDZrrik:
7952 case X86::VREDUCESDZrrikz:
7953 case X86::VRNDSCALESDZr_Int:
7954 case X86::VRNDSCALESDZr_Intk:
7955 case X86::VRNDSCALESDZr_Intkz:
7956 case X86::VRSQRT14SDZrr:
7957 case X86::VRSQRT14SDZrrk:
7958 case X86::VRSQRT14SDZrrkz:
7959 case X86::VRSQRT28SDZr:
7960 case X86::VRSQRT28SDZrk:
7961 case X86::VRSQRT28SDZrkz:
7962 case X86::VSCALEFSDZrr:
7963 case X86::VSCALEFSDZrrk:
7964 case X86::VSCALEFSDZrrkz:
7965 return false;
7966 default:
7967 return true;
7968 }
7969 }
7970
7971 if ((Opc == X86::VMOVSHZrm || Opc == X86::VMOVSHZrm_alt) && RegSize > 16) {
7972 // These instructions only load 16 bits, we can't fold them if the
7973 // destination register is wider than 16 bits (2 bytes), and its user
7974 // instruction isn't scalar (SH).
7975 switch (UserOpc) {
7976 case X86::VADDSHZrr_Int:
7977 case X86::VCMPSHZrri_Int:
7978 case X86::VDIVSHZrr_Int:
7979 case X86::VMAXSHZrr_Int:
7980 case X86::VMINSHZrr_Int:
7981 case X86::VMULSHZrr_Int:
7982 case X86::VSUBSHZrr_Int:
7983 case X86::VADDSHZrr_Intk:
7984 case X86::VADDSHZrr_Intkz:
7985 case X86::VCMPSHZrri_Intk:
7986 case X86::VDIVSHZrr_Intk:
7987 case X86::VDIVSHZrr_Intkz:
7988 case X86::VMAXSHZrr_Intk:
7989 case X86::VMAXSHZrr_Intkz:
7990 case X86::VMINSHZrr_Intk:
7991 case X86::VMINSHZrr_Intkz:
7992 case X86::VMULSHZrr_Intk:
7993 case X86::VMULSHZrr_Intkz:
7994 case X86::VSUBSHZrr_Intk:
7995 case X86::VSUBSHZrr_Intkz:
7996 case X86::VFMADD132SHZr_Int:
7997 case X86::VFNMADD132SHZr_Int:
7998 case X86::VFMADD213SHZr_Int:
7999 case X86::VFNMADD213SHZr_Int:
8000 case X86::VFMADD231SHZr_Int:
8001 case X86::VFNMADD231SHZr_Int:
8002 case X86::VFMSUB132SHZr_Int:
8003 case X86::VFNMSUB132SHZr_Int:
8004 case X86::VFMSUB213SHZr_Int:
8005 case X86::VFNMSUB213SHZr_Int:
8006 case X86::VFMSUB231SHZr_Int:
8007 case X86::VFNMSUB231SHZr_Int:
8008 case X86::VFMADD132SHZr_Intk:
8009 case X86::VFNMADD132SHZr_Intk:
8010 case X86::VFMADD213SHZr_Intk:
8011 case X86::VFNMADD213SHZr_Intk:
8012 case X86::VFMADD231SHZr_Intk:
8013 case X86::VFNMADD231SHZr_Intk:
8014 case X86::VFMSUB132SHZr_Intk:
8015 case X86::VFNMSUB132SHZr_Intk:
8016 case X86::VFMSUB213SHZr_Intk:
8017 case X86::VFNMSUB213SHZr_Intk:
8018 case X86::VFMSUB231SHZr_Intk:
8019 case X86::VFNMSUB231SHZr_Intk:
8020 case X86::VFMADD132SHZr_Intkz:
8021 case X86::VFNMADD132SHZr_Intkz:
8022 case X86::VFMADD213SHZr_Intkz:
8023 case X86::VFNMADD213SHZr_Intkz:
8024 case X86::VFMADD231SHZr_Intkz:
8025 case X86::VFNMADD231SHZr_Intkz:
8026 case X86::VFMSUB132SHZr_Intkz:
8027 case X86::VFNMSUB132SHZr_Intkz:
8028 case X86::VFMSUB213SHZr_Intkz:
8029 case X86::VFNMSUB213SHZr_Intkz:
8030 case X86::VFMSUB231SHZr_Intkz:
8031 case X86::VFNMSUB231SHZr_Intkz:
8032 return false;
8033 default:
8034 return true;
8035 }
8036 }
8037
8038 return false;
8039}
8040
8043 MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
8044 LiveIntervals *LIS) const {
8045
8046 // TODO: Support the case where LoadMI loads a wide register, but MI
8047 // only uses a subreg.
8048 for (auto Op : Ops) {
8049 if (MI.getOperand(Op).getSubReg())
8050 return nullptr;
8051 }
8052
8053 // If loading from a FrameIndex, fold directly from the FrameIndex.
8054 unsigned NumOps = LoadMI.getDesc().getNumOperands();
8055 int FrameIndex;
8056 if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
8057 if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
8058 return nullptr;
8059 return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS);
8060 }
8061
8062 // Check switch flag
8063 if (NoFusing)
8064 return nullptr;
8065
8066 // Avoid partial and undef register update stalls unless optimizing for size.
8067 if (!MF.getFunction().hasOptSize() &&
8068 (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/ true) ||
8070 return nullptr;
8071
8072 // Determine the alignment of the load.
8073 Align Alignment;
8074 unsigned LoadOpc = LoadMI.getOpcode();
8075 if (LoadMI.hasOneMemOperand())
8076 Alignment = (*LoadMI.memoperands_begin())->getAlign();
8077 else
8078 switch (LoadOpc) {
8079 case X86::AVX512_512_SET0:
8080 case X86::AVX512_512_SETALLONES:
8081 Alignment = Align(64);
8082 break;
8083 case X86::AVX2_SETALLONES:
8084 case X86::AVX1_SETALLONES:
8085 case X86::AVX_SET0:
8086 case X86::AVX512_256_SET0:
8087 Alignment = Align(32);
8088 break;
8089 case X86::V_SET0:
8090 case X86::V_SETALLONES:
8091 case X86::AVX512_128_SET0:
8092 case X86::FsFLD0F128:
8093 case X86::AVX512_FsFLD0F128:
8094 Alignment = Align(16);
8095 break;
8096 case X86::MMX_SET0:
8097 case X86::FsFLD0SD:
8098 case X86::AVX512_FsFLD0SD:
8099 Alignment = Align(8);
8100 break;
8101 case X86::FsFLD0SS:
8102 case X86::AVX512_FsFLD0SS:
8103 Alignment = Align(4);
8104 break;
8105 case X86::FsFLD0SH:
8106 case X86::AVX512_FsFLD0SH:
8107 Alignment = Align(2);
8108 break;
8109 default:
8110 return nullptr;
8111 }
8112 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
8113 unsigned NewOpc = 0;
8114 switch (MI.getOpcode()) {
8115 default:
8116 return nullptr;
8117 case X86::TEST8rr:
8118 NewOpc = X86::CMP8ri;
8119 break;
8120 case X86::TEST16rr:
8121 NewOpc = X86::CMP16ri;
8122 break;
8123 case X86::TEST32rr:
8124 NewOpc = X86::CMP32ri;
8125 break;
8126 case X86::TEST64rr:
8127 NewOpc = X86::CMP64ri32;
8128 break;
8129 }
8130 // Change to CMPXXri r, 0 first.
8131 MI.setDesc(get(NewOpc));
8132 MI.getOperand(1).ChangeToImmediate(0);
8133 } else if (Ops.size() != 1)
8134 return nullptr;
8135
8136 // Make sure the subregisters match.
8137 // Otherwise we risk changing the size of the load.
8138 if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg())
8139 return nullptr;
8140
8142 switch (LoadOpc) {
8143 case X86::MMX_SET0:
8144 case X86::V_SET0:
8145 case X86::V_SETALLONES:
8146 case X86::AVX2_SETALLONES:
8147 case X86::AVX1_SETALLONES:
8148 case X86::AVX_SET0:
8149 case X86::AVX512_128_SET0:
8150 case X86::AVX512_256_SET0:
8151 case X86::AVX512_512_SET0:
8152 case X86::AVX512_512_SETALLONES:
8153 case X86::FsFLD0SH:
8154 case X86::AVX512_FsFLD0SH:
8155 case X86::FsFLD0SD:
8156 case X86::AVX512_FsFLD0SD:
8157 case X86::FsFLD0SS:
8158 case X86::AVX512_FsFLD0SS:
8159 case X86::FsFLD0F128:
8160 case X86::AVX512_FsFLD0F128: {
8161 // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
8162 // Create a constant-pool entry and operands to load from it.
8163
8164 // Large code model can't fold loads this way.
8166 return nullptr;
8167
8168 // x86-32 PIC requires a PIC base register for constant pools.
8169 unsigned PICBase = 0;
8170 // Since we're using Small or Kernel code model, we can always use
8171 // RIP-relative addressing for a smaller encoding.
8172 if (Subtarget.is64Bit()) {
8173 PICBase = X86::RIP;
8174 } else if (MF.getTarget().isPositionIndependent()) {
8175 // FIXME: PICBase = getGlobalBaseReg(&MF);
8176 // This doesn't work for several reasons.
8177 // 1. GlobalBaseReg may have been spilled.
8178 // 2. It may not be live at MI.
8179 return nullptr;
8180 }
8181
8182 // Create a constant-pool entry.
8184 Type *Ty;
8185 bool IsAllOnes = false;
8186 switch (LoadOpc) {
8187 case X86::FsFLD0SS:
8188 case X86::AVX512_FsFLD0SS:
8190 break;
8191 case X86::FsFLD0SD:
8192 case X86::AVX512_FsFLD0SD:
8194 break;
8195 case X86::FsFLD0F128:
8196 case X86::AVX512_FsFLD0F128:
8198 break;
8199 case X86::FsFLD0SH:
8200 case X86::AVX512_FsFLD0SH:
8202 break;
8203 case X86::AVX512_512_SETALLONES:
8204 IsAllOnes = true;
8205 [[fallthrough]];
8206 case X86::AVX512_512_SET0:
8208 16);
8209 break;
8210 case X86::AVX1_SETALLONES:
8211 case X86::AVX2_SETALLONES:
8212 IsAllOnes = true;
8213 [[fallthrough]];
8214 case X86::AVX512_256_SET0:
8215 case X86::AVX_SET0:
8217 8);
8218
8219 break;
8220 case X86::MMX_SET0:
8222 2);
8223 break;
8224 case X86::V_SETALLONES:
8225 IsAllOnes = true;
8226 [[fallthrough]];
8227 case X86::V_SET0:
8228 case X86::AVX512_128_SET0:
8230 4);
8231 break;
8232 }
8233
8234 const Constant *C =
8236 unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
8237
8238 // Create operands to load from the constant pool entry.
8239 MOs.push_back(MachineOperand::CreateReg(PICBase, false));
8241 MOs.push_back(MachineOperand::CreateReg(0, false));
8243 MOs.push_back(MachineOperand::CreateReg(0, false));
8244 break;
8245 }
8246 case X86::VPBROADCASTBZ128rm:
8247 case X86::VPBROADCASTBZ256rm:
8248 case X86::VPBROADCASTBZrm:
8249 case X86::VBROADCASTF32X2Z256rm:
8250 case X86::VBROADCASTF32X2Zrm:
8251 case X86::VBROADCASTI32X2Z128rm:
8252 case X86::VBROADCASTI32X2Z256rm:
8253 case X86::VBROADCASTI32X2Zrm:
8254 // No instructions currently fuse with 8bits or 32bits x 2.
8255 return nullptr;
8256
8257#define FOLD_BROADCAST(SIZE) \
8258 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands, \
8259 LoadMI.operands_begin() + NumOps); \
8260 return foldMemoryBroadcast(MF, MI, Ops[0], MOs, InsertPt, /*Size=*/SIZE, \
8261 /*AllowCommute=*/true);
8262 case X86::VPBROADCASTWZ128rm:
8263 case X86::VPBROADCASTWZ256rm:
8264 case X86::VPBROADCASTWZrm:
8265 FOLD_BROADCAST(16);
8266 case X86::VPBROADCASTDZ128rm:
8267 case X86::VPBROADCASTDZ256rm:
8268 case X86::VPBROADCASTDZrm:
8269 case X86::VBROADCASTSSZ128rm:
8270 case X86::VBROADCASTSSZ256rm:
8271 case X86::VBROADCASTSSZrm:
8272 FOLD_BROADCAST(32);
8273 case X86::VPBROADCASTQZ128rm:
8274 case X86::VPBROADCASTQZ256rm:
8275 case X86::VPBROADCASTQZrm:
8276 case X86::VBROADCASTSDZ256rm:
8277 case X86::VBROADCASTSDZrm:
8278 FOLD_BROADCAST(64);
8279 default: {
8280 if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
8281 return nullptr;
8282
8283 // Folding a normal load. Just copy the load's address operands.
8284 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands,
8285 LoadMI.operands_begin() + NumOps);
8286 break;
8287 }
8288 }
8289 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
8290 /*Size=*/0, Alignment, /*AllowCommute=*/true);
8291}
8292
8294X86InstrInfo::foldMemoryBroadcast(MachineFunction &MF, MachineInstr &MI,
8295 unsigned OpNum, ArrayRef<MachineOperand> MOs,
8297 unsigned BitsSize, bool AllowCommute) const {
8298
8299 if (auto *I = lookupBroadcastFoldTable(MI.getOpcode(), OpNum))
8300 return matchBroadcastSize(*I, BitsSize)
8301 ? fuseInst(MF, I->DstOp, OpNum, MOs, InsertPt, MI, *this)
8302 : nullptr;
8303
8304 if (AllowCommute) {
8305 // If the instruction and target operand are commutable, commute the
8306 // instruction and try again.
8307 unsigned CommuteOpIdx2 = commuteOperandsForFold(MI, OpNum);
8308 if (CommuteOpIdx2 == OpNum) {
8309 printFailMsgforFold(MI, OpNum);
8310 return nullptr;
8311 }
8312 MachineInstr *NewMI =
8313 foldMemoryBroadcast(MF, MI, CommuteOpIdx2, MOs, InsertPt, BitsSize,
8314 /*AllowCommute=*/false);
8315 if (NewMI)
8316 return NewMI;
8317 // Folding failed again - undo the commute before returning.
8318 commuteInstruction(MI, false, OpNum, CommuteOpIdx2);
8319 }
8320
8321 printFailMsgforFold(MI, OpNum);
8322 return nullptr;
8323}
8324
8328
8329 for (MachineMemOperand *MMO : MMOs) {
8330 if (!MMO->isLoad())
8331 continue;
8332
8333 if (!MMO->isStore()) {
8334 // Reuse the MMO.
8335 LoadMMOs.push_back(MMO);
8336 } else {
8337 // Clone the MMO and unset the store flag.
8338 LoadMMOs.push_back(MF.getMachineMemOperand(
8339 MMO, MMO->getFlags() & ~MachineMemOperand::MOStore));
8340 }
8341 }
8342
8343 return LoadMMOs;
8344}
8345
8349
8350 for (MachineMemOperand *MMO : MMOs) {
8351 if (!MMO->isStore())
8352 continue;
8353
8354 if (!MMO->isLoad()) {
8355 // Reuse the MMO.
8356 StoreMMOs.push_back(MMO);
8357 } else {
8358 // Clone the MMO and unset the load flag.
8359 StoreMMOs.push_back(MF.getMachineMemOperand(
8360 MMO, MMO->getFlags() & ~MachineMemOperand::MOLoad));
8361 }
8362 }
8363
8364 return StoreMMOs;
8365}
8366
8368 const TargetRegisterClass *RC,
8369 const X86Subtarget &STI) {
8370 assert(STI.hasAVX512() && "Expected at least AVX512!");
8371 unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC);
8372 assert((SpillSize == 64 || STI.hasVLX()) &&
8373 "Can't broadcast less than 64 bytes without AVX512VL!");
8374
8375#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64) \
8376 case TYPE: \
8377 switch (SpillSize) { \
8378 default: \
8379 llvm_unreachable("Unknown spill size"); \
8380 case 16: \
8381 return X86::OP16; \
8382 case 32: \
8383 return X86::OP32; \
8384 case 64: \
8385 return X86::OP64; \
8386 } \
8387 break;
8388
8389 switch (I->Flags & TB_BCAST_MASK) {
8390 default:
8391 llvm_unreachable("Unexpected broadcast type!");
8392 CASE_BCAST_TYPE_OPC(TB_BCAST_W, VPBROADCASTWZ128rm, VPBROADCASTWZ256rm,
8393 VPBROADCASTWZrm)
8394 CASE_BCAST_TYPE_OPC(TB_BCAST_D, VPBROADCASTDZ128rm, VPBROADCASTDZ256rm,
8395 VPBROADCASTDZrm)
8396 CASE_BCAST_TYPE_OPC(TB_BCAST_Q, VPBROADCASTQZ128rm, VPBROADCASTQZ256rm,
8397 VPBROADCASTQZrm)
8398 CASE_BCAST_TYPE_OPC(TB_BCAST_SH, VPBROADCASTWZ128rm, VPBROADCASTWZ256rm,
8399 VPBROADCASTWZrm)
8400 CASE_BCAST_TYPE_OPC(TB_BCAST_SS, VBROADCASTSSZ128rm, VBROADCASTSSZ256rm,
8401 VBROADCASTSSZrm)
8402 CASE_BCAST_TYPE_OPC(TB_BCAST_SD, VMOVDDUPZ128rm, VBROADCASTSDZ256rm,
8403 VBROADCASTSDZrm)
8404 }
8405}
8406
8408 MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
8409 bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
8410 const X86FoldTableEntry *I = lookupUnfoldTable(MI.getOpcode());
8411 if (I == nullptr)
8412 return false;
8413 unsigned Opc = I->DstOp;
8414 unsigned Index = I->Flags & TB_INDEX_MASK;
8415 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
8416 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
8417 if (UnfoldLoad && !FoldedLoad)
8418 return false;
8419 UnfoldLoad &= FoldedLoad;
8420 if (UnfoldStore && !FoldedStore)
8421 return false;
8422 UnfoldStore &= FoldedStore;
8423
8424 const MCInstrDesc &MCID = get(Opc);
8425
8426 const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
8428 // TODO: Check if 32-byte or greater accesses are slow too?
8429 if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
8430 Subtarget.isUnalignedMem16Slow())
8431 // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
8432 // conservatively assume the address is unaligned. That's bad for
8433 // performance.
8434 return false;
8439 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
8440 MachineOperand &Op = MI.getOperand(i);
8441 if (i >= Index && i < Index + X86::AddrNumOperands)
8442 AddrOps.push_back(Op);
8443 else if (Op.isReg() && Op.isImplicit())
8444 ImpOps.push_back(Op);
8445 else if (i < Index)
8446 BeforeOps.push_back(Op);
8447 else if (i > Index)
8448 AfterOps.push_back(Op);
8449 }
8450
8451 // Emit the load or broadcast instruction.
8452 if (UnfoldLoad) {
8453 auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
8454
8455 unsigned Opc;
8456 if (I->Flags & TB_BCAST_MASK) {
8457 Opc = getBroadcastOpcode(I, RC, Subtarget);
8458 } else {
8459 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
8460 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8461 Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget);
8462 }
8463
8464 DebugLoc DL;
8465 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg);
8466 for (const MachineOperand &AddrOp : AddrOps)
8467 MIB.add(AddrOp);
8468 MIB.setMemRefs(MMOs);
8469 NewMIs.push_back(MIB);
8470
8471 if (UnfoldStore) {
8472 // Address operands cannot be marked isKill.
8473 for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
8474 MachineOperand &MO = NewMIs[0]->getOperand(i);
8475 if (MO.isReg())
8476 MO.setIsKill(false);
8477 }
8478 }
8479 }
8480
8481 // Emit the data processing instruction.
8482 MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI.getDebugLoc(), true);
8483 MachineInstrBuilder MIB(MF, DataMI);
8484
8485 if (FoldedStore)
8486 MIB.addReg(Reg, RegState::Define);
8487 for (MachineOperand &BeforeOp : BeforeOps)
8488 MIB.add(BeforeOp);
8489 if (FoldedLoad)
8490 MIB.addReg(Reg);
8491 for (MachineOperand &AfterOp : AfterOps)
8492 MIB.add(AfterOp);
8493 for (MachineOperand &ImpOp : ImpOps) {
8494 MIB.addReg(ImpOp.getReg(), getDefRegState(ImpOp.isDef()) |
8496 getKillRegState(ImpOp.isKill()) |
8497 getDeadRegState(ImpOp.isDead()) |
8498 getUndefRegState(ImpOp.isUndef()));
8499 }
8500 // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
8501 switch (DataMI->getOpcode()) {
8502 default:
8503 break;
8504 case X86::CMP64ri32:
8505 case X86::CMP32ri:
8506 case X86::CMP16ri:
8507 case X86::CMP8ri: {
8508 MachineOperand &MO0 = DataMI->getOperand(0);
8509 MachineOperand &MO1 = DataMI->getOperand(1);
8510 if (MO1.isImm() && MO1.getImm() == 0) {
8511 unsigned NewOpc;
8512 switch (DataMI->getOpcode()) {
8513 default:
8514 llvm_unreachable("Unreachable!");
8515 case X86::CMP64ri32:
8516 NewOpc = X86::TEST64rr;
8517 break;
8518 case X86::CMP32ri:
8519 NewOpc = X86::TEST32rr;
8520 break;
8521 case X86::CMP16ri:
8522 NewOpc = X86::TEST16rr;
8523 break;
8524 case X86::CMP8ri:
8525 NewOpc = X86::TEST8rr;
8526 break;
8527 }
8528 DataMI->setDesc(get(NewOpc));
8529 MO1.ChangeToRegister(MO0.getReg(), false);
8530 }
8531 }
8532 }
8533 NewMIs.push_back(DataMI);
8534
8535 // Emit the store instruction.
8536 if (UnfoldStore) {
8537 const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
8538 auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
8539 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
8540 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8541 unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
8542 DebugLoc DL;
8543 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
8544 for (const MachineOperand &AddrOp : AddrOps)
8545 MIB.add(AddrOp);
8546 MIB.addReg(Reg, RegState::Kill);
8547 MIB.setMemRefs(MMOs);
8548 NewMIs.push_back(MIB);
8549 }
8550
8551 return true;
8552}
8553
8555 SelectionDAG &DAG, SDNode *N, SmallVectorImpl<SDNode *> &NewNodes) const {
8556 if (!N->isMachineOpcode())
8557 return false;
8558
8559 const X86FoldTableEntry *I = lookupUnfoldTable(N->getMachineOpcode());
8560 if (I == nullptr)
8561 return false;
8562 unsigned Opc = I->DstOp;
8563 unsigned Index = I->Flags & TB_INDEX_MASK;
8564 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
8565 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
8566 const MCInstrDesc &MCID = get(Opc);
8569 const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
8570 unsigned NumDefs = MCID.NumDefs;
8571 std::vector<SDValue> AddrOps;
8572 std::vector<SDValue> BeforeOps;
8573 std::vector<SDValue> AfterOps;
8574 SDLoc dl(N);
8575 unsigned NumOps = N->getNumOperands();
8576 for (unsigned i = 0; i != NumOps - 1; ++i) {
8577 SDValue Op = N->getOperand(i);
8578 if (i >= Index - NumDefs && i < Index - NumDefs + X86::AddrNumOperands)
8579 AddrOps.push_back(Op);
8580 else if (i < Index - NumDefs)
8581 BeforeOps.push_back(Op);
8582 else if (i > Index - NumDefs)
8583 AfterOps.push_back(Op);
8584 }
8585 SDValue Chain = N->getOperand(NumOps - 1);
8586 AddrOps.push_back(Chain);
8587
8588 // Emit the load instruction.
8589 SDNode *Load = nullptr;
8590 if (FoldedLoad) {
8591 EVT VT = *TRI.legalclasstypes_begin(*RC);
8592 auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
8593 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8594 Subtarget.isUnalignedMem16Slow())
8595 // Do not introduce a slow unaligned load.
8596 return false;
8597 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
8598 // memory access is slow above.
8599
8600 unsigned Opc;
8601 if (I->Flags & TB_BCAST_MASK) {
8602 Opc = getBroadcastOpcode(I, RC, Subtarget);
8603 } else {
8604 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
8605 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8606 Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget);
8607 }
8608
8609 Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps);
8610 NewNodes.push_back(Load);
8611
8612 // Preserve memory reference information.
8613 DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs);
8614 }
8615
8616 // Emit the data processing instruction.
8617 std::vector<EVT> VTs;
8618 const TargetRegisterClass *DstRC = nullptr;
8619 if (MCID.getNumDefs() > 0) {
8620 DstRC = getRegClass(MCID, 0, &RI, MF);
8621 VTs.push_back(*TRI.legalclasstypes_begin(*DstRC));
8622 }
8623 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
8624 EVT VT = N->getValueType(i);
8625 if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
8626 VTs.push_back(VT);
8627 }
8628 if (Load)
8629 BeforeOps.push_back(SDValue(Load, 0));
8630 llvm::append_range(BeforeOps, AfterOps);
8631 // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
8632 switch (Opc) {
8633 default:
8634 break;
8635 case X86::CMP64ri32:
8636 case X86::CMP32ri:
8637 case X86::CMP16ri:
8638 case X86::CMP8ri:
8639 if (isNullConstant(BeforeOps[1])) {
8640 switch (Opc) {
8641 default:
8642 llvm_unreachable("Unreachable!");
8643 case X86::CMP64ri32:
8644 Opc = X86::TEST64rr;
8645 break;
8646 case X86::CMP32ri:
8647 Opc = X86::TEST32rr;
8648 break;
8649 case X86::CMP16ri:
8650 Opc = X86::TEST16rr;
8651 break;
8652 case X86::CMP8ri:
8653 Opc = X86::TEST8rr;
8654 break;
8655 }
8656 BeforeOps[1] = BeforeOps[0];
8657 }
8658 }
8659 SDNode *NewNode = DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
8660 NewNodes.push_back(NewNode);
8661
8662 // Emit the store instruction.
8663 if (FoldedStore) {
8664 AddrOps.pop_back();
8665 AddrOps.push_back(SDValue(NewNode, 0));
8666 AddrOps.push_back(Chain);
8667 auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
8668 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8669 Subtarget.isUnalignedMem16Slow())
8670 // Do not introduce a slow unaligned store.
8671 return false;
8672 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
8673 // memory access is slow above.
8674 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
8675 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8676 SDNode *Store =
8677 DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
8678 dl, MVT::Other, AddrOps);
8679 NewNodes.push_back(Store);
8680
8681 // Preserve memory reference information.
8682 DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs);
8683 }
8684
8685 return true;
8686}
8687
8688unsigned
8689X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad,
8690 bool UnfoldStore,
8691 unsigned *LoadRegIndex) const {
8693 if (I == nullptr)
8694 return 0;
8695 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
8696 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
8697 if (UnfoldLoad && !FoldedLoad)
8698 return 0;
8699 if (UnfoldStore && !FoldedStore)
8700 return 0;
8701 if (LoadRegIndex)
8702 *LoadRegIndex = I->Flags & TB_INDEX_MASK;
8703 return I->DstOp;
8704}
8705
8707 int64_t &Offset1,
8708 int64_t &Offset2) const {
8709 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
8710 return false;
8711
8712 auto IsLoadOpcode = [&](unsigned Opcode) {
8713 switch (Opcode) {
8714 default:
8715 return false;
8716 case X86::MOV8rm:
8717 case X86::MOV16rm:
8718 case X86::MOV32rm:
8719 case X86::MOV64rm:
8720 case X86::LD_Fp32m:
8721 case X86::LD_Fp64m:
8722 case X86::LD_Fp80m:
8723 case X86::MOVSSrm:
8724 case X86::MOVSSrm_alt:
8725 case X86::MOVSDrm:
8726 case X86::MOVSDrm_alt:
8727 case X86::MMX_MOVD64rm:
8728 case X86::MMX_MOVQ64rm:
8729 case X86::MOVAPSrm:
8730 case X86::MOVUPSrm:
8731 case X86::MOVAPDrm:
8732 case X86::MOVUPDrm:
8733 case X86::MOVDQArm:
8734 case X86::MOVDQUrm:
8735 // AVX load instructions
8736 case X86::VMOVSSrm:
8737 case X86::VMOVSSrm_alt:
8738 case X86::VMOVSDrm:
8739 case X86::VMOVSDrm_alt:
8740 case X86::VMOVAPSrm:
8741 case X86::VMOVUPSrm:
8742 case X86::VMOVAPDrm:
8743 case X86::VMOVUPDrm:
8744 case X86::VMOVDQArm:
8745 case X86::VMOVDQUrm:
8746 case X86::VMOVAPSYrm:
8747 case X86::VMOVUPSYrm:
8748 case X86::VMOVAPDYrm:
8749 case X86::VMOVUPDYrm:
8750 case X86::VMOVDQAYrm:
8751 case X86::VMOVDQUYrm:
8752 // AVX512 load instructions
8753 case X86::VMOVSSZrm:
8754 case X86::VMOVSSZrm_alt:
8755 case X86::VMOVSDZrm:
8756 case X86::VMOVSDZrm_alt:
8757 case X86::VMOVAPSZ128rm:
8758 case X86::VMOVUPSZ128rm:
8759 case X86::VMOVAPSZ128rm_NOVLX:
8760 case X86::VMOVUPSZ128rm_NOVLX:
8761 case X86::VMOVAPDZ128rm:
8762 case X86::VMOVUPDZ128rm:
8763 case X86::VMOVDQU8Z128rm:
8764 case X86::VMOVDQU16Z128rm:
8765 case X86::VMOVDQA32Z128rm:
8766 case X86::VMOVDQU32Z128rm:
8767 case X86::VMOVDQA64Z128rm:
8768 case X86::VMOVDQU64Z128rm:
8769 case X86::VMOVAPSZ256rm:
8770 case X86::VMOVUPSZ256rm:
8771 case X86::VMOVAPSZ256rm_NOVLX:
8772 case X86::VMOVUPSZ256rm_NOVLX:
8773 case X86::VMOVAPDZ256rm:
8774 case X86::VMOVUPDZ256rm:
8775 case X86::VMOVDQU8Z256rm:
8776 case X86::VMOVDQU16Z256rm:
8777 case X86::VMOVDQA32Z256rm:
8778 case X86::VMOVDQU32Z256rm:
8779 case X86::VMOVDQA64Z256rm:
8780 case X86::VMOVDQU64Z256rm:
8781 case X86::VMOVAPSZrm:
8782 case X86::VMOVUPSZrm:
8783 case X86::VMOVAPDZrm:
8784 case X86::VMOVUPDZrm:
8785 case X86::VMOVDQU8Zrm:
8786 case X86::VMOVDQU16Zrm:
8787 case X86::VMOVDQA32Zrm:
8788 case X86::VMOVDQU32Zrm:
8789 case X86::VMOVDQA64Zrm:
8790 case X86::VMOVDQU64Zrm:
8791 case X86::KMOVBkm:
8792 case X86::KMOVBkm_EVEX:
8793 case X86::KMOVWkm:
8794 case X86::KMOVWkm_EVEX:
8795 case X86::KMOVDkm:
8796 case X86::KMOVDkm_EVEX:
8797 case X86::KMOVQkm:
8798 case X86::KMOVQkm_EVEX:
8799 return true;
8800 }
8801 };
8802
8803 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
8804 !IsLoadOpcode(Load2->getMachineOpcode()))
8805 return false;
8806
8807 // Lambda to check if both the loads have the same value for an operand index.
8808 auto HasSameOp = [&](int I) {
8809 return Load1->getOperand(I) == Load2->getOperand(I);
8810 };
8811
8812 // All operands except the displacement should match.
8813 if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) ||
8814 !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg))
8815 return false;
8816
8817 // Chain Operand must be the same.
8818 if (!HasSameOp(5))
8819 return false;
8820
8821 // Now let's examine if the displacements are constants.
8822 auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp));
8823 auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp));
8824 if (!Disp1 || !Disp2)
8825 return false;
8826
8827 Offset1 = Disp1->getSExtValue();
8828 Offset2 = Disp2->getSExtValue();
8829 return true;
8830}
8831
8833 int64_t Offset1, int64_t Offset2,
8834 unsigned NumLoads) const {
8835 assert(Offset2 > Offset1);
8836 if ((Offset2 - Offset1) / 8 > 64)
8837 return false;
8838
8839 unsigned Opc1 = Load1->getMachineOpcode();
8840 unsigned Opc2 = Load2->getMachineOpcode();
8841 if (Opc1 != Opc2)
8842 return false; // FIXME: overly conservative?
8843
8844 switch (Opc1) {
8845 default:
8846 break;
8847 case X86::LD_Fp32m:
8848 case X86::LD_Fp64m:
8849 case X86::LD_Fp80m:
8850 case X86::MMX_MOVD64rm:
8851 case X86::MMX_MOVQ64rm:
8852 return false;
8853 }
8854
8855 EVT VT = Load1->getValueType(0);
8856 switch (VT.getSimpleVT().SimpleTy) {
8857 default:
8858 // XMM registers. In 64-bit mode we can be a bit more aggressive since we
8859 // have 16 of them to play with.
8860 if (Subtarget.is64Bit()) {
8861 if (NumLoads >= 3)
8862 return false;
8863 } else if (NumLoads) {
8864 return false;
8865 }
8866 break;
8867 case MVT::i8:
8868 case MVT::i16:
8869 case MVT::i32:
8870 case MVT::i64:
8871 case MVT::f32:
8872 case MVT::f64:
8873 if (NumLoads)
8874 return false;
8875 break;
8876 }
8877
8878 return true;
8879}
8880
8882 const MachineBasicBlock *MBB,
8883 const MachineFunction &MF) const {
8884
8885 // ENDBR instructions should not be scheduled around.
8886 unsigned Opcode = MI.getOpcode();
8887 if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
8888 Opcode == X86::PLDTILECFGV)
8889 return true;
8890
8891 // Frame setup and destory can't be scheduled around.
8892 if (MI.getFlag(MachineInstr::FrameSetup) ||
8894 return true;
8895
8897}
8898
8901 assert(Cond.size() == 1 && "Invalid X86 branch condition!");
8902 X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
8904 return false;
8905}
8906
8908 const TargetRegisterClass *RC) const {
8909 // FIXME: Return false for x87 stack register classes for now. We can't
8910 // allow any loads of these registers before FpGet_ST0_80.
8911 return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
8912 RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
8913 RC == &X86::RFP80RegClass);
8914}
8915
8916/// Return a virtual register initialized with the
8917/// the global base register value. Output instructions required to
8918/// initialize the register in the function entry block, if necessary.
8919///
8920/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
8921///
8924 Register GlobalBaseReg = X86FI->getGlobalBaseReg();
8925 if (GlobalBaseReg != 0)
8926 return GlobalBaseReg;
8927
8928 // Create the register. The code to initialize it is inserted
8929 // later, by the CGBR pass (below).
8930 MachineRegisterInfo &RegInfo = MF->getRegInfo();
8931 GlobalBaseReg = RegInfo.createVirtualRegister(
8932 Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass);
8933 X86FI->setGlobalBaseReg(GlobalBaseReg);
8934 return GlobalBaseReg;
8935}
8936
8937// FIXME: Some shuffle and unpack instructions have equivalents in different
8938// domains, but they require a bit more work than just switching opcodes.
8939
8940static const uint16_t *lookup(unsigned opcode, unsigned domain,
8941 ArrayRef<uint16_t[3]> Table) {
8942 for (const uint16_t(&Row)[3] : Table)
8943 if (Row[domain - 1] == opcode)
8944 return Row;
8945 return nullptr;
8946}
8947
8948static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
8949 ArrayRef<uint16_t[4]> Table) {
8950 // If this is the integer domain make sure to check both integer columns.
8951 for (const uint16_t(&Row)[4] : Table)
8952 if (Row[domain - 1] == opcode || (domain == 3 && Row[3] == opcode))
8953 return Row;
8954 return nullptr;
8955}
8956
8957// Helper to attempt to widen/narrow blend masks.
8958static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth,
8959 unsigned NewWidth, unsigned *pNewMask = nullptr) {
8960 assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&
8961 "Illegal blend mask scale");
8962 unsigned NewMask = 0;
8963
8964 if ((OldWidth % NewWidth) == 0) {
8965 unsigned Scale = OldWidth / NewWidth;
8966 unsigned SubMask = (1u << Scale) - 1;
8967 for (unsigned i = 0; i != NewWidth; ++i) {
8968 unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
8969 if (Sub == SubMask)
8970 NewMask |= (1u << i);
8971 else if (Sub != 0x0)
8972 return false;
8973 }
8974 } else {
8975 unsigned Scale = NewWidth / OldWidth;
8976 unsigned SubMask = (1u << Scale) - 1;
8977 for (unsigned i = 0; i != OldWidth; ++i) {
8978 if (OldMask & (1 << i)) {
8979 NewMask |= (SubMask << (i * Scale));
8980 }
8981 }
8982 }
8983
8984 if (pNewMask)
8985 *pNewMask = NewMask;
8986 return true;
8987}
8988
8990 unsigned Opcode = MI.getOpcode();
8991 unsigned NumOperands = MI.getDesc().getNumOperands();
8992
8993 auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) {
8994 uint16_t validDomains = 0;
8995 if (MI.getOperand(NumOperands - 1).isImm()) {
8996 unsigned Imm = MI.getOperand(NumOperands - 1).getImm();
8997 if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4))
8998 validDomains |= 0x2; // PackedSingle
8999 if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2))
9000 validDomains |= 0x4; // PackedDouble
9001 if (!Is256 || Subtarget.hasAVX2())
9002 validDomains |= 0x8; // PackedInt
9003 }
9004 return validDomains;
9005 };
9006
9007 switch (Opcode) {
9008 case X86::BLENDPDrmi:
9009 case X86::BLENDPDrri:
9010 case X86::VBLENDPDrmi:
9011 case X86::VBLENDPDrri:
9012 return GetBlendDomains(2, false);
9013 case X86::VBLENDPDYrmi:
9014 case X86::VBLENDPDYrri:
9015 return GetBlendDomains(4, true);
9016 case X86::BLENDPSrmi:
9017 case X86::BLENDPSrri:
9018 case X86::VBLENDPSrmi:
9019 case X86::VBLENDPSrri:
9020 case X86::VPBLENDDrmi:
9021 case X86::VPBLENDDrri:
9022 return GetBlendDomains(4, false);
9023 case X86::VBLENDPSYrmi:
9024 case X86::VBLENDPSYrri:
9025 case X86::VPBLENDDYrmi:
9026 case X86::VPBLENDDYrri:
9027 return GetBlendDomains(8, true);
9028 case X86::PBLENDWrmi:
9029 case X86::PBLENDWrri:
9030 case X86::VPBLENDWrmi:
9031 case X86::VPBLENDWrri:
9032 // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks.
9033 case X86::VPBLENDWYrmi:
9034 case X86::VPBLENDWYrri:
9035 return GetBlendDomains(8, false);
9036 case X86::VPANDDZ128rr:
9037 case X86::VPANDDZ128rm:
9038 case X86::VPANDDZ256rr:
9039 case X86::VPANDDZ256rm:
9040 case X86::VPANDQZ128rr:
9041 case X86::VPANDQZ128rm:
9042 case X86::VPANDQZ256rr:
9043 case X86::VPANDQZ256rm:
9044 case X86::VPANDNDZ128rr:
9045 case X86::VPANDNDZ128rm:
9046 case X86::VPANDNDZ256rr:
9047 case X86::VPANDNDZ256rm:
9048 case X86::VPANDNQZ128rr:
9049 case X86::VPANDNQZ128rm:
9050 case X86::VPANDNQZ256rr:
9051 case X86::VPANDNQZ256rm:
9052 case X86::VPORDZ128rr:
9053 case X86::VPORDZ128rm:
9054 case X86::VPORDZ256rr:
9055 case X86::VPORDZ256rm:
9056 case X86::VPORQZ128rr:
9057 case X86::VPORQZ128rm:
9058 case X86::VPORQZ256rr:
9059 case X86::VPORQZ256rm:
9060 case X86::VPXORDZ128rr:
9061 case X86::VPXORDZ128rm:
9062 case X86::VPXORDZ256rr:
9063 case X86::VPXORDZ256rm:
9064 case X86::VPXORQZ128rr:
9065 case X86::VPXORQZ128rm:
9066 case X86::VPXORQZ256rr:
9067 case X86::VPXORQZ256rm:
9068 // If we don't have DQI see if we can still switch from an EVEX integer
9069 // instruction to a VEX floating point instruction.
9070 if (Subtarget.hasDQI())
9071 return 0;
9072
9073 if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16)
9074 return 0;
9075 if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16)
9076 return 0;
9077 // Register forms will have 3 operands. Memory form will have more.
9078 if (NumOperands == 3 &&
9079 RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16)
9080 return 0;
9081
9082 // All domains are valid.
9083 return 0xe;
9084 case X86::MOVHLPSrr:
9085 // We can swap domains when both inputs are the same register.
9086 // FIXME: This doesn't catch all the cases we would like. If the input
9087 // register isn't KILLed by the instruction, the two address instruction
9088 // pass puts a COPY on one input. The other input uses the original
9089 // register. This prevents the same physical register from being used by
9090 // both inputs.
9091 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
9092 MI.getOperand(0).getSubReg() == 0 &&
9093 MI.getOperand(1).getSubReg() == 0 && MI.getOperand(2).getSubReg() == 0)
9094 return 0x6;
9095 return 0;
9096 case X86::SHUFPDrri:
9097 return 0x6;
9098 }
9099 return 0;
9100}
9101
9102#include "X86ReplaceableInstrs.def"
9103
9105 unsigned Domain) const {
9106 assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
9107 uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
9108 assert(dom && "Not an SSE instruction");
9109
9110 unsigned Opcode = MI.getOpcode();
9111 unsigned NumOperands = MI.getDesc().getNumOperands();
9112
9113 auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) {
9114 if (MI.getOperand(NumOperands - 1).isImm()) {
9115 unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255;
9116 Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
9117 unsigned NewImm = Imm;
9118
9119 const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs);
9120 if (!table)
9121 table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
9122
9123 if (Domain == 1) { // PackedSingle
9124 AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
9125 } else if (Domain == 2) { // PackedDouble
9126 AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm);
9127 } else if (Domain == 3) { // PackedInt
9128 if (Subtarget.hasAVX2()) {
9129 // If we are already VPBLENDW use that, else use VPBLENDD.
9130 if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
9131 table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
9132 AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
9133 }
9134 } else {
9135 assert(!Is256 && "128-bit vector expected");
9136 AdjustBlendMask(Imm, ImmWidth, 8, &NewImm);
9137 }
9138 }
9139
9140 assert(table && table[Domain - 1] && "Unknown domain op");
9141 MI.setDesc(get(table[Domain - 1]));
9142 MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
9143 }
9144 return true;
9145 };
9146
9147 switch (Opcode) {
9148 case X86::BLENDPDrmi:
9149 case X86::BLENDPDrri:
9150 case X86::VBLENDPDrmi:
9151 case X86::VBLENDPDrri:
9152 return SetBlendDomain(2, false);
9153 case X86::VBLENDPDYrmi:
9154 case X86::VBLENDPDYrri:
9155 return SetBlendDomain(4, true);
9156 case X86::BLENDPSrmi:
9157 case X86::BLENDPSrri:
9158 case X86::VBLENDPSrmi:
9159 case X86::VBLENDPSrri:
9160 case X86::VPBLENDDrmi:
9161 case X86::VPBLENDDrri:
9162 return SetBlendDomain(4, false);
9163 case X86::VBLENDPSYrmi:
9164 case X86::VBLENDPSYrri:
9165 case X86::VPBLENDDYrmi:
9166 case X86::VPBLENDDYrri:
9167 return SetBlendDomain(8, true);
9168 case X86::PBLENDWrmi:
9169 case X86::PBLENDWrri:
9170 case X86::VPBLENDWrmi:
9171 case X86::VPBLENDWrri:
9172 return SetBlendDomain(8, false);
9173 case X86::VPBLENDWYrmi:
9174 case X86::VPBLENDWYrri:
9175 return SetBlendDomain(16, true);
9176 case X86::VPANDDZ128rr:
9177 case X86::VPANDDZ128rm:
9178 case X86::VPANDDZ256rr:
9179 case X86::VPANDDZ256rm:
9180 case X86::VPANDQZ128rr:
9181 case X86::VPANDQZ128rm:
9182 case X86::VPANDQZ256rr:
9183 case X86::VPANDQZ256rm:
9184 case X86::VPANDNDZ128rr:
9185 case X86::VPANDNDZ128rm:
9186 case X86::VPANDNDZ256rr:
9187 case X86::VPANDNDZ256rm:
9188 case X86::VPANDNQZ128rr:
9189 case X86::VPANDNQZ128rm:
9190 case X86::VPANDNQZ256rr:
9191 case X86::VPANDNQZ256rm:
9192 case X86::VPORDZ128rr:
9193 case X86::VPORDZ128rm:
9194 case X86::VPORDZ256rr:
9195 case X86::VPORDZ256rm:
9196 case X86::VPORQZ128rr:
9197 case X86::VPORQZ128rm:
9198 case X86::VPORQZ256rr:
9199 case X86::VPORQZ256rm:
9200 case X86::VPXORDZ128rr:
9201 case X86::VPXORDZ128rm:
9202 case X86::VPXORDZ256rr:
9203 case X86::VPXORDZ256rm:
9204 case X86::VPXORQZ128rr:
9205 case X86::VPXORQZ128rm:
9206 case X86::VPXORQZ256rr:
9207 case X86::VPXORQZ256rm: {
9208 // Without DQI, convert EVEX instructions to VEX instructions.
9209 if (Subtarget.hasDQI())
9210 return false;
9211
9212 const uint16_t *table =
9213 lookupAVX512(MI.getOpcode(), dom, ReplaceableCustomAVX512LogicInstrs);
9214 assert(table && "Instruction not found in table?");
9215 // Don't change integer Q instructions to D instructions and
9216 // use D intructions if we started with a PS instruction.
9217 if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
9218 Domain = 4;
9219 MI.setDesc(get(table[Domain - 1]));
9220 return true;
9221 }
9222 case X86::UNPCKHPDrr:
9223 case X86::MOVHLPSrr:
9224 // We just need to commute the instruction which will switch the domains.
9225 if (Domain != dom && Domain != 3 &&
9226 MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
9227 MI.getOperand(0).getSubReg() == 0 &&
9228 MI.getOperand(1).getSubReg() == 0 &&
9229 MI.getOperand(2).getSubReg() == 0) {
9230 commuteInstruction(MI, false);
9231 return true;
9232 }
9233 // We must always return true for MOVHLPSrr.
9234 if (Opcode == X86::MOVHLPSrr)
9235 return true;
9236 break;
9237 case X86::SHUFPDrri: {
9238 if (Domain == 1) {
9239 unsigned Imm = MI.getOperand(3).getImm();
9240 unsigned NewImm = 0x44;
9241 if (Imm & 1)
9242 NewImm |= 0x0a;
9243 if (Imm & 2)
9244 NewImm |= 0xa0;
9245 MI.getOperand(3).setImm(NewImm);
9246 MI.setDesc(get(X86::SHUFPSrri));
9247 }
9248 return true;
9249 }
9250 }
9251 return false;
9252}
9253
9254std::pair<uint16_t, uint16_t>
9256 uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
9257 unsigned opcode = MI.getOpcode();
9258 uint16_t validDomains = 0;
9259 if (domain) {
9260 // Attempt to match for custom instructions.
9261 validDomains = getExecutionDomainCustom(MI);
9262 if (validDomains)
9263 return std::make_pair(domain, validDomains);
9264
9265 if (lookup(opcode, domain, ReplaceableInstrs)) {
9266 validDomains = 0xe;
9267 } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
9268 validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
9269 } else if (lookup(opcode, domain, ReplaceableInstrsFP)) {
9270 validDomains = 0x6;
9271 } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
9272 // Insert/extract instructions should only effect domain if AVX2
9273 // is enabled.
9274 if (!Subtarget.hasAVX2())
9275 return std::make_pair(0, 0);
9276 validDomains = 0xe;
9277 } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
9278 validDomains = 0xe;
9279 } else if (Subtarget.hasDQI() &&
9280 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
9281 validDomains = 0xe;
9282 } else if (Subtarget.hasDQI()) {
9283 if (const uint16_t *table =
9284 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQMasked)) {
9285 if (domain == 1 || (domain == 3 && table[3] == opcode))
9286 validDomains = 0xa;
9287 else
9288 validDomains = 0xc;
9289 }
9290 }
9291 }
9292 return std::make_pair(domain, validDomains);
9293}
9294
9296 assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
9297 uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
9298 assert(dom && "Not an SSE instruction");
9299
9300 // Attempt to match for custom instructions.
9302 return;
9303
9304 const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
9305 if (!table) { // try the other table
9306 assert((Subtarget.hasAVX2() || Domain < 3) &&
9307 "256-bit vector operations only available in AVX2");
9308 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
9309 }
9310 if (!table) { // try the FP table
9311 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP);
9312 assert((!table || Domain < 3) &&
9313 "Can only select PackedSingle or PackedDouble");
9314 }
9315 if (!table) { // try the other table
9316 assert(Subtarget.hasAVX2() &&
9317 "256-bit insert/extract only available in AVX2");
9318 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
9319 }
9320 if (!table) { // try the AVX512 table
9321 assert(Subtarget.hasAVX512() && "Requires AVX-512");
9322 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
9323 // Don't change integer Q instructions to D instructions.
9324 if (table && Domain == 3 && table[3] == MI.getOpcode())
9325 Domain = 4;
9326 }
9327 if (!table) { // try the AVX512DQ table
9328 assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
9329 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
9330 // Don't change integer Q instructions to D instructions and
9331 // use D instructions if we started with a PS instruction.
9332 if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
9333 Domain = 4;
9334 }
9335 if (!table) { // try the AVX512DQMasked table
9336 assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
9337 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked);
9338 if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
9339 Domain = 4;
9340 }
9341 assert(table && "Cannot change domain");
9342 MI.setDesc(get(table[Domain - 1]));
9343}
9344
9347 DebugLoc DL;
9348 BuildMI(MBB, MI, DL, get(X86::NOOP));
9349}
9350
9351/// Return the noop instruction to use for a noop.
9353 MCInst Nop;
9354 Nop.setOpcode(X86::NOOP);
9355 return Nop;
9356}
9357
9359 switch (opc) {
9360 default:
9361 return false;
9362 case X86::DIVPDrm:
9363 case X86::DIVPDrr:
9364 case X86::DIVPSrm:
9365 case X86::DIVPSrr:
9366 case X86::DIVSDrm:
9367 case X86::DIVSDrm_Int:
9368 case X86::DIVSDrr:
9369 case X86::DIVSDrr_Int:
9370 case X86::DIVSSrm:
9371 case X86::DIVSSrm_Int:
9372 case X86::DIVSSrr:
9373 case X86::DIVSSrr_Int:
9374 case X86::SQRTPDm:
9375 case X86::SQRTPDr:
9376 case X86::SQRTPSm:
9377 case X86::SQRTPSr:
9378 case X86::SQRTSDm:
9379 case X86::SQRTSDm_Int:
9380 case X86::SQRTSDr:
9381 case X86::SQRTSDr_Int:
9382 case X86::SQRTSSm:
9383 case X86::SQRTSSm_Int:
9384 case X86::SQRTSSr:
9385 case X86::SQRTSSr_Int:
9386 // AVX instructions with high latency
9387 case X86::VDIVPDrm:
9388 case X86::VDIVPDrr:
9389 case X86::VDIVPDYrm:
9390 case X86::VDIVPDYrr:
9391 case X86::VDIVPSrm:
9392 case X86::VDIVPSrr:
9393 case X86::VDIVPSYrm:
9394 case X86::VDIVPSYrr:
9395 case X86::VDIVSDrm:
9396 case X86::VDIVSDrm_Int:
9397 case X86::VDIVSDrr:
9398 case X86::VDIVSDrr_Int:
9399 case X86::VDIVSSrm:
9400 case X86::VDIVSSrm_Int:
9401 case X86::VDIVSSrr:
9402 case X86::VDIVSSrr_Int:
9403 case X86::VSQRTPDm:
9404 case X86::VSQRTPDr:
9405 case X86::VSQRTPDYm:
9406 case X86::VSQRTPDYr:
9407 case X86::VSQRTPSm:
9408 case X86::VSQRTPSr:
9409 case X86::VSQRTPSYm:
9410 case X86::VSQRTPSYr:
9411 case X86::VSQRTSDm:
9412 case X86::VSQRTSDm_Int:
9413 case X86::VSQRTSDr:
9414 case X86::VSQRTSDr_Int:
9415 case X86::VSQRTSSm:
9416 case X86::VSQRTSSm_Int:
9417 case X86::VSQRTSSr:
9418 case X86::VSQRTSSr_Int:
9419 // AVX512 instructions with high latency
9420 case X86::VDIVPDZ128rm:
9421 case X86::VDIVPDZ128rmb:
9422 case X86::VDIVPDZ128rmbk:
9423 case X86::VDIVPDZ128rmbkz:
9424 case X86::VDIVPDZ128rmk:
9425 case X86::VDIVPDZ128rmkz:
9426 case X86::VDIVPDZ128rr:
9427 case X86::VDIVPDZ128rrk:
9428 case X86::VDIVPDZ128rrkz:
9429 case X86::VDIVPDZ256rm:
9430 case X86::VDIVPDZ256rmb:
9431 case X86::VDIVPDZ256rmbk:
9432 case X86::VDIVPDZ256rmbkz:
9433 case X86::VDIVPDZ256rmk:
9434 case X86::VDIVPDZ256rmkz:
9435 case X86::VDIVPDZ256rr:
9436 case X86::VDIVPDZ256rrk:
9437 case X86::VDIVPDZ256rrkz:
9438 case X86::VDIVPDZrrb:
9439 case X86::VDIVPDZrrbk:
9440 case X86::VDIVPDZrrbkz:
9441 case X86::VDIVPDZrm:
9442 case X86::VDIVPDZrmb:
9443 case X86::VDIVPDZrmbk:
9444 case X86::VDIVPDZrmbkz:
9445 case X86::VDIVPDZrmk:
9446 case X86::VDIVPDZrmkz:
9447 case X86::VDIVPDZrr:
9448 case X86::VDIVPDZrrk:
9449 case X86::VDIVPDZrrkz:
9450 case X86::VDIVPSZ128rm:
9451 case X86::VDIVPSZ128rmb:
9452 case X86::VDIVPSZ128rmbk:
9453 case X86::VDIVPSZ128rmbkz:
9454 case X86::VDIVPSZ128rmk:
9455 case X86::VDIVPSZ128rmkz:
9456 case X86::VDIVPSZ128rr:
9457 case X86::VDIVPSZ128rrk:
9458 case X86::VDIVPSZ128rrkz:
9459 case X86::VDIVPSZ256rm:
9460 case X86::VDIVPSZ256rmb:
9461 case X86::VDIVPSZ256rmbk:
9462 case X86::VDIVPSZ256rmbkz:
9463 case X86::VDIVPSZ256rmk:
9464 case X86::VDIVPSZ256rmkz:
9465 case X86::VDIVPSZ256rr:
9466 case X86::VDIVPSZ256rrk:
9467 case X86::VDIVPSZ256rrkz:
9468 case X86::VDIVPSZrrb:
9469 case X86::VDIVPSZrrbk:
9470 case X86::VDIVPSZrrbkz:
9471 case X86::VDIVPSZrm:
9472 case X86::VDIVPSZrmb:
9473 case X86::VDIVPSZrmbk:
9474 case X86::VDIVPSZrmbkz:
9475 case X86::VDIVPSZrmk:
9476 case X86::VDIVPSZrmkz:
9477 case X86::VDIVPSZrr:
9478 case X86::VDIVPSZrrk:
9479 case X86::VDIVPSZrrkz:
9480 case X86::VDIVSDZrm:
9481 case X86::VDIVSDZrr:
9482 case X86::VDIVSDZrm_Int:
9483 case X86::VDIVSDZrm_Intk:
9484 case X86::VDIVSDZrm_Intkz:
9485 case X86::VDIVSDZrr_Int:
9486 case X86::VDIVSDZrr_Intk:
9487 case X86::VDIVSDZrr_Intkz:
9488 case X86::VDIVSDZrrb_Int:
9489 case X86::VDIVSDZrrb_Intk:
9490 case X86::VDIVSDZrrb_Intkz:
9491 case X86::VDIVSSZrm:
9492 case X86::VDIVSSZrr:
9493 case X86::VDIVSSZrm_Int:
9494 case X86::VDIVSSZrm_Intk:
9495 case X86::VDIVSSZrm_Intkz:
9496 case X86::VDIVSSZrr_Int:
9497 case X86::VDIVSSZrr_Intk:
9498 case X86::VDIVSSZrr_Intkz:
9499 case X86::VDIVSSZrrb_Int:
9500 case X86::VDIVSSZrrb_Intk:
9501 case X86::VDIVSSZrrb_Intkz:
9502 case X86::VSQRTPDZ128m:
9503 case X86::VSQRTPDZ128mb:
9504 case X86::VSQRTPDZ128mbk:
9505 case X86::VSQRTPDZ128mbkz:
9506 case X86::VSQRTPDZ128mk:
9507 case X86::VSQRTPDZ128mkz:
9508 case X86::VSQRTPDZ128r:
9509 case X86::VSQRTPDZ128rk:
9510 case X86::VSQRTPDZ128rkz:
9511 case X86::VSQRTPDZ256m:
9512 case X86::VSQRTPDZ256mb:
9513 case X86::VSQRTPDZ256mbk:
9514 case X86::VSQRTPDZ256mbkz:
9515 case X86::VSQRTPDZ256mk:
9516 case X86::VSQRTPDZ256mkz:
9517 case X86::VSQRTPDZ256r:
9518 case X86::VSQRTPDZ256rk:
9519 case X86::VSQRTPDZ256rkz:
9520 case X86::VSQRTPDZm:
9521 case X86::VSQRTPDZmb:
9522 case X86::VSQRTPDZmbk:
9523 case X86::VSQRTPDZmbkz:
9524 case X86::VSQRTPDZmk:
9525 case X86::VSQRTPDZmkz:
9526 case X86::VSQRTPDZr:
9527 case X86::VSQRTPDZrb:
9528 case X86::VSQRTPDZrbk:
9529 case X86::VSQRTPDZrbkz:
9530 case X86::VSQRTPDZrk:
9531 case X86::VSQRTPDZrkz:
9532 case X86::VSQRTPSZ128m:
9533 case X86::VSQRTPSZ128mb:
9534 case X86::VSQRTPSZ128mbk:
9535 case X86::VSQRTPSZ128mbkz:
9536 case X86::VSQRTPSZ128mk:
9537 case X86::VSQRTPSZ128mkz:
9538 case X86::VSQRTPSZ128r:
9539 case X86::VSQRTPSZ128rk:
9540 case X86::VSQRTPSZ128rkz:
9541 case X86::VSQRTPSZ256m:
9542 case X86::VSQRTPSZ256mb:
9543 case X86::VSQRTPSZ256mbk:
9544 case X86::VSQRTPSZ256mbkz:
9545 case X86::VSQRTPSZ256mk:
9546 case X86::VSQRTPSZ256mkz:
9547 case X86::VSQRTPSZ256r:
9548 case X86::VSQRTPSZ256rk:
9549 case X86::VSQRTPSZ256rkz:
9550 case X86::VSQRTPSZm:
9551 case X86::VSQRTPSZmb:
9552 case X86::VSQRTPSZmbk:
9553 case X86::VSQRTPSZmbkz:
9554 case X86::VSQRTPSZmk:
9555 case X86::VSQRTPSZmkz:
9556 case X86::VSQRTPSZr:
9557 case X86::VSQRTPSZrb:
9558 case X86::VSQRTPSZrbk:
9559 case X86::VSQRTPSZrbkz:
9560 case X86::VSQRTPSZrk:
9561 case X86::VSQRTPSZrkz:
9562 case X86::VSQRTSDZm:
9563 case X86::VSQRTSDZm_Int:
9564 case X86::VSQRTSDZm_Intk:
9565 case X86::VSQRTSDZm_Intkz:
9566 case X86::VSQRTSDZr:
9567 case X86::VSQRTSDZr_Int:
9568 case X86::VSQRTSDZr_Intk:
9569 case X86::VSQRTSDZr_Intkz:
9570 case X86::VSQRTSDZrb_Int:
9571 case X86::VSQRTSDZrb_Intk:
9572 case X86::VSQRTSDZrb_Intkz:
9573 case X86::VSQRTSSZm:
9574 case X86::VSQRTSSZm_Int:
9575 case X86::VSQRTSSZm_Intk:
9576 case X86::VSQRTSSZm_Intkz:
9577 case X86::VSQRTSSZr:
9578 case X86::VSQRTSSZr_Int:
9579 case X86::VSQRTSSZr_Intk:
9580 case X86::VSQRTSSZr_Intkz:
9581 case X86::VSQRTSSZrb_Int:
9582 case X86::VSQRTSSZrb_Intk:
9583 case X86::VSQRTSSZrb_Intkz:
9584
9585 case X86::VGATHERDPDYrm:
9586 case X86::VGATHERDPDZ128rm:
9587 case X86::VGATHERDPDZ256rm:
9588 case X86::VGATHERDPDZrm:
9589 case X86::VGATHERDPDrm:
9590 case X86::VGATHERDPSYrm:
9591 case X86::VGATHERDPSZ128rm:
9592 case X86::VGATHERDPSZ256rm:
9593 case X86::VGATHERDPSZrm:
9594 case X86::VGATHERDPSrm:
9595 case X86::VGATHERPF0DPDm:
9596 case X86::VGATHERPF0DPSm:
9597 case X86::VGATHERPF0QPDm:
9598 case X86::VGATHERPF0QPSm:
9599 case X86::VGATHERPF1DPDm:
9600 case X86::VGATHERPF1DPSm:
9601 case X86::VGATHERPF1QPDm:
9602 case X86::VGATHERPF1QPSm:
9603 case X86::VGATHERQPDYrm:
9604 case X86::VGATHERQPDZ128rm:
9605 case X86::VGATHERQPDZ256rm:
9606 case X86::VGATHERQPDZrm:
9607 case X86::VGATHERQPDrm:
9608 case X86::VGATHERQPSYrm:
9609 case X86::VGATHERQPSZ128rm:
9610 case X86::VGATHERQPSZ256rm:
9611 case X86::VGATHERQPSZrm:
9612 case X86::VGATHERQPSrm:
9613 case X86::VPGATHERDDYrm:
9614 case X86::VPGATHERDDZ128rm:
9615 case X86::VPGATHERDDZ256rm:
9616 case X86::VPGATHERDDZrm:
9617 case X86::VPGATHERDDrm:
9618 case X86::VPGATHERDQYrm:
9619 case X86::VPGATHERDQZ128rm:
9620 case X86::VPGATHERDQZ256rm:
9621 case X86::VPGATHERDQZrm:
9622 case X86::VPGATHERDQrm:
9623 case X86::VPGATHERQDYrm:
9624 case X86::VPGATHERQDZ128rm:
9625 case X86::VPGATHERQDZ256rm:
9626 case X86::VPGATHERQDZrm:
9627 case X86::VPGATHERQDrm:
9628 case X86::VPGATHERQQYrm:
9629 case X86::VPGATHERQQZ128rm:
9630 case X86::VPGATHERQQZ256rm:
9631 case X86::VPGATHERQQZrm:
9632 case X86::VPGATHERQQrm:
9633 case X86::VSCATTERDPDZ128mr:
9634 case X86::VSCATTERDPDZ256mr:
9635 case X86::VSCATTERDPDZmr:
9636 case X86::VSCATTERDPSZ128mr:
9637 case X86::VSCATTERDPSZ256mr:
9638 case X86::VSCATTERDPSZmr:
9639 case X86::VSCATTERPF0DPDm:
9640 case X86::VSCATTERPF0DPSm:
9641 case X86::VSCATTERPF0QPDm:
9642 case X86::VSCATTERPF0QPSm:
9643 case X86::VSCATTERPF1DPDm:
9644 case X86::VSCATTERPF1DPSm:
9645 case X86::VSCATTERPF1QPDm:
9646 case X86::VSCATTERPF1QPSm:
9647 case X86::VSCATTERQPDZ128mr:
9648 case X86::VSCATTERQPDZ256mr:
9649 case X86::VSCATTERQPDZmr:
9650 case X86::VSCATTERQPSZ128mr:
9651 case X86::VSCATTERQPSZ256mr:
9652 case X86::VSCATTERQPSZmr:
9653 case X86::VPSCATTERDDZ128mr:
9654 case X86::VPSCATTERDDZ256mr:
9655 case X86::VPSCATTERDDZmr:
9656 case X86::VPSCATTERDQZ128mr:
9657 case X86::VPSCATTERDQZ256mr:
9658 case X86::VPSCATTERDQZmr:
9659 case X86::VPSCATTERQDZ128mr:
9660 case X86::VPSCATTERQDZ256mr:
9661 case X86::VPSCATTERQDZmr:
9662 case X86::VPSCATTERQQZ128mr:
9663 case X86::VPSCATTERQQZ256mr:
9664 case X86::VPSCATTERQQZmr:
9665 return true;
9666 }
9667}
9668
9670 const MachineRegisterInfo *MRI,
9671 const MachineInstr &DefMI,
9672 unsigned DefIdx,
9673 const MachineInstr &UseMI,
9674 unsigned UseIdx) const {
9675 return isHighLatencyDef(DefMI.getOpcode());
9676}
9677
9679 const MachineBasicBlock *MBB) const {
9680 assert(Inst.getNumExplicitOperands() == 3 && Inst.getNumExplicitDefs() == 1 &&
9681 Inst.getNumDefs() <= 2 && "Reassociation needs binary operators");
9682
9683 // Integer binary math/logic instructions have a third source operand:
9684 // the EFLAGS register. That operand must be both defined here and never
9685 // used; ie, it must be dead. If the EFLAGS operand is live, then we can
9686 // not change anything because rearranging the operands could affect other
9687 // instructions that depend on the exact status flags (zero, sign, etc.)
9688 // that are set by using these particular operands with this operation.
9689 const MachineOperand *FlagDef =
9690 Inst.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
9691 assert((Inst.getNumDefs() == 1 || FlagDef) && "Implicit def isn't flags?");
9692 if (FlagDef && !FlagDef->isDead())
9693 return false;
9694
9696}
9697
9698// TODO: There are many more machine instruction opcodes to match:
9699// 1. Other data types (integer, vectors)
9700// 2. Other math / logic operations (xor, or)
9701// 3. Other forms of the same operation (intrinsics and other variants)
9703 bool Invert) const {
9704 if (Invert)
9705 return false;
9706 switch (Inst.getOpcode()) {
9707 CASE_ND(ADD8rr)
9708 CASE_ND(ADD16rr)
9709 CASE_ND(ADD32rr)
9710 CASE_ND(ADD64rr)
9711 CASE_ND(AND8rr)
9712 CASE_ND(AND16rr)
9713 CASE_ND(AND32rr)
9714 CASE_ND(AND64rr)
9715 CASE_ND(OR8rr)
9716 CASE_ND(OR16rr)
9717 CASE_ND(OR32rr)
9718 CASE_ND(OR64rr)
9719 CASE_ND(XOR8rr)
9720 CASE_ND(XOR16rr)
9721 CASE_ND(XOR32rr)
9722 CASE_ND(XOR64rr)
9723 CASE_ND(IMUL16rr)
9724 CASE_ND(IMUL32rr)
9725 CASE_ND(IMUL64rr)
9726 case X86::PANDrr:
9727 case X86::PORrr:
9728 case X86::PXORrr:
9729 case X86::ANDPDrr:
9730 case X86::ANDPSrr:
9731 case X86::ORPDrr:
9732 case X86::ORPSrr:
9733 case X86::XORPDrr:
9734 case X86::XORPSrr:
9735 case X86::PADDBrr:
9736 case X86::PADDWrr:
9737 case X86::PADDDrr:
9738 case X86::PADDQrr:
9739 case X86::PMULLWrr:
9740 case X86::PMULLDrr:
9741 case X86::PMAXSBrr:
9742 case X86::PMAXSDrr:
9743 case X86::PMAXSWrr:
9744 case X86::PMAXUBrr:
9745 case X86::PMAXUDrr:
9746 case X86::PMAXUWrr:
9747 case X86::PMINSBrr:
9748 case X86::PMINSDrr:
9749 case X86::PMINSWrr:
9750 case X86::PMINUBrr:
9751 case X86::PMINUDrr:
9752 case X86::PMINUWrr:
9753 case X86::VPANDrr:
9754 case X86::VPANDYrr:
9755 case X86::VPANDDZ128rr:
9756 case X86::VPANDDZ256rr:
9757 case X86::VPANDDZrr:
9758 case X86::VPANDQZ128rr:
9759 case X86::VPANDQZ256rr:
9760 case X86::VPANDQZrr:
9761 case X86::VPORrr:
9762 case X86::VPORYrr:
9763 case X86::VPORDZ128rr:
9764 case X86::VPORDZ256rr:
9765 case X86::VPORDZrr:
9766 case X86::VPORQZ128rr:
9767 case X86::VPORQZ256rr:
9768 case X86::VPORQZrr:
9769 case X86::VPXORrr:
9770 case X86::VPXORYrr:
9771 case X86::VPXORDZ128rr:
9772 case X86::VPXORDZ256rr:
9773 case X86::VPXORDZrr:
9774 case X86::VPXORQZ128rr:
9775 case X86::VPXORQZ256rr:
9776 case X86::VPXORQZrr:
9777 case X86::VANDPDrr:
9778 case X86::VANDPSrr:
9779 case X86::VANDPDYrr:
9780 case X86::VANDPSYrr:
9781 case X86::VANDPDZ128rr:
9782 case X86::VANDPSZ128rr:
9783 case X86::VANDPDZ256rr:
9784 case X86::VANDPSZ256rr:
9785 case X86::VANDPDZrr:
9786 case X86::VANDPSZrr:
9787 case X86::VORPDrr:
9788 case X86::VORPSrr:
9789 case X86::VORPDYrr:
9790 case X86::VORPSYrr:
9791 case X86::VORPDZ128rr:
9792 case X86::VORPSZ128rr:
9793 case X86::VORPDZ256rr:
9794 case X86::VORPSZ256rr:
9795 case X86::VORPDZrr:
9796 case X86::VORPSZrr:
9797 case X86::VXORPDrr:
9798 case X86::VXORPSrr:
9799 case X86::VXORPDYrr:
9800 case X86::VXORPSYrr:
9801 case X86::VXORPDZ128rr:
9802 case X86::VXORPSZ128rr:
9803 case X86::VXORPDZ256rr:
9804 case X86::VXORPSZ256rr:
9805 case X86::VXORPDZrr:
9806 case X86::VXORPSZrr:
9807 case X86::KADDBrr:
9808 case X86::KADDWrr:
9809 case X86::KADDDrr:
9810 case X86::KADDQrr:
9811 case X86::KANDBrr:
9812 case X86::KANDWrr:
9813 case X86::KANDDrr:
9814 case X86::KANDQrr:
9815 case X86::KORBrr:
9816 case X86::KORWrr:
9817 case X86::KORDrr:
9818 case X86::KORQrr:
9819 case X86::KXORBrr:
9820 case X86::KXORWrr:
9821 case X86::KXORDrr:
9822 case X86::KXORQrr:
9823 case X86::VPADDBrr:
9824 case X86::VPADDWrr:
9825 case X86::VPADDDrr:
9826 case X86::VPADDQrr:
9827 case X86::VPADDBYrr:
9828 case X86::VPADDWYrr:
9829 case X86::VPADDDYrr:
9830 case X86::VPADDQYrr:
9831 case X86::VPADDBZ128rr:
9832 case X86::VPADDWZ128rr:
9833 case X86::VPADDDZ128rr:
9834 case X86::VPADDQZ128rr:
9835 case X86::VPADDBZ256rr:
9836 case X86::VPADDWZ256rr:
9837 case X86::VPADDDZ256rr:
9838 case X86::VPADDQZ256rr:
9839 case X86::VPADDBZrr:
9840 case X86::VPADDWZrr:
9841 case X86::VPADDDZrr:
9842 case X86::VPADDQZrr:
9843 case X86::VPMULLWrr:
9844 case X86::VPMULLWYrr:
9845 case X86::VPMULLWZ128rr:
9846 case X86::VPMULLWZ256rr:
9847 case X86::VPMULLWZrr:
9848 case X86::VPMULLDrr:
9849 case X86::VPMULLDYrr:
9850 case X86::VPMULLDZ128rr:
9851 case X86::VPMULLDZ256rr:
9852 case X86::VPMULLDZrr:
9853 case X86::VPMULLQZ128rr:
9854 case X86::VPMULLQZ256rr:
9855 case X86::VPMULLQZrr:
9856 case X86::VPMAXSBrr:
9857 case X86::VPMAXSBYrr:
9858 case X86::VPMAXSBZ128rr:
9859 case X86::VPMAXSBZ256rr:
9860 case X86::VPMAXSBZrr:
9861 case X86::VPMAXSDrr:
9862 case X86::VPMAXSDYrr:
9863 case X86::VPMAXSDZ128rr:
9864 case X86::VPMAXSDZ256rr:
9865 case X86::VPMAXSDZrr:
9866 case X86::VPMAXSQZ128rr:
9867 case X86::VPMAXSQZ256rr:
9868 case X86::VPMAXSQZrr:
9869 case X86::VPMAXSWrr:
9870 case X86::VPMAXSWYrr:
9871 case X86::VPMAXSWZ128rr:
9872 case X86::VPMAXSWZ256rr:
9873 case X86::VPMAXSWZrr:
9874 case X86::VPMAXUBrr:
9875 case X86::VPMAXUBYrr:
9876 case X86::VPMAXUBZ128rr:
9877 case X86::VPMAXUBZ256rr:
9878 case X86::VPMAXUBZrr:
9879 case X86::VPMAXUDrr:
9880 case X86::VPMAXUDYrr:
9881 case X86::VPMAXUDZ128rr:
9882 case X86::VPMAXUDZ256rr:
9883 case X86::VPMAXUDZrr:
9884 case X86::VPMAXUQZ128rr:
9885 case X86::VPMAXUQZ256rr:
9886 case X86::VPMAXUQZrr:
9887 case X86::VPMAXUWrr:
9888 case X86::VPMAXUWYrr:
9889 case X86::VPMAXUWZ128rr:
9890 case X86::VPMAXUWZ256rr:
9891 case X86::VPMAXUWZrr:
9892 case X86::VPMINSBrr:
9893 case X86::VPMINSBYrr:
9894 case X86::VPMINSBZ128rr:
9895 case X86::VPMINSBZ256rr:
9896 case X86::VPMINSBZrr:
9897 case X86::VPMINSDrr:
9898 case X86::VPMINSDYrr:
9899 case X86::VPMINSDZ128rr:
9900 case X86::VPMINSDZ256rr:
9901 case X86::VPMINSDZrr:
9902 case X86::VPMINSQZ128rr:
9903 case X86::VPMINSQZ256rr:
9904 case X86::VPMINSQZrr:
9905 case X86::VPMINSWrr:
9906 case X86::VPMINSWYrr:
9907 case X86::VPMINSWZ128rr:
9908 case X86::VPMINSWZ256rr:
9909 case X86::VPMINSWZrr:
9910 case X86::VPMINUBrr:
9911 case X86::VPMINUBYrr:
9912 case X86::VPMINUBZ128rr:
9913 case X86::VPMINUBZ256rr:
9914 case X86::VPMINUBZrr:
9915 case X86::VPMINUDrr:
9916 case X86::VPMINUDYrr:
9917 case X86::VPMINUDZ128rr:
9918 case X86::VPMINUDZ256rr:
9919 case X86::VPMINUDZrr:
9920 case X86::VPMINUQZ128rr:
9921 case X86::VPMINUQZ256rr:
9922 case X86::VPMINUQZrr:
9923 case X86::VPMINUWrr:
9924 case X86::VPMINUWYrr:
9925 case X86::VPMINUWZ128rr:
9926 case X86::VPMINUWZ256rr:
9927 case X86::VPMINUWZrr:
9928 // Normal min/max instructions are not commutative because of NaN and signed
9929 // zero semantics, but these are. Thus, there's no need to check for global
9930 // relaxed math; the instructions themselves have the properties we need.
9931 case X86::MAXCPDrr:
9932 case X86::MAXCPSrr:
9933 case X86::MAXCSDrr:
9934 case X86::MAXCSSrr:
9935 case X86::MINCPDrr:
9936 case X86::MINCPSrr:
9937 case X86::MINCSDrr:
9938 case X86::MINCSSrr:
9939 case X86::VMAXCPDrr:
9940 case X86::VMAXCPSrr:
9941 case X86::VMAXCPDYrr:
9942 case X86::VMAXCPSYrr:
9943 case X86::VMAXCPDZ128rr:
9944 case X86::VMAXCPSZ128rr:
9945 case X86::VMAXCPDZ256rr:
9946 case X86::VMAXCPSZ256rr:
9947 case X86::VMAXCPDZrr:
9948 case X86::VMAXCPSZrr:
9949 case X86::VMAXCSDrr:
9950 case X86::VMAXCSSrr:
9951 case X86::VMAXCSDZrr:
9952 case X86::VMAXCSSZrr:
9953 case X86::VMINCPDrr:
9954 case X86::VMINCPSrr:
9955 case X86::VMINCPDYrr:
9956 case X86::VMINCPSYrr:
9957 case X86::VMINCPDZ128rr:
9958 case X86::VMINCPSZ128rr:
9959 case X86::VMINCPDZ256rr:
9960 case X86::VMINCPSZ256rr:
9961 case X86::VMINCPDZrr:
9962 case X86::VMINCPSZrr:
9963 case X86::VMINCSDrr:
9964 case X86::VMINCSSrr:
9965 case X86::VMINCSDZrr:
9966 case X86::VMINCSSZrr:
9967 case X86::VMAXCPHZ128rr:
9968 case X86::VMAXCPHZ256rr:
9969 case X86::VMAXCPHZrr:
9970 case X86::VMAXCSHZrr:
9971 case X86::VMINCPHZ128rr:
9972 case X86::VMINCPHZ256rr:
9973 case X86::VMINCPHZrr:
9974 case X86::VMINCSHZrr:
9975 return true;
9976 case X86::ADDPDrr:
9977 case X86::ADDPSrr:
9978 case X86::ADDSDrr:
9979 case X86::ADDSSrr:
9980 case X86::MULPDrr:
9981 case X86::MULPSrr:
9982 case X86::MULSDrr:
9983 case X86::MULSSrr:
9984 case X86::VADDPDrr:
9985 case X86::VADDPSrr:
9986 case X86::VADDPDYrr:
9987 case X86::VADDPSYrr:
9988 case X86::VADDPDZ128rr:
9989 case X86::VADDPSZ128rr:
9990 case X86::VADDPDZ256rr:
9991 case X86::VADDPSZ256rr:
9992 case X86::VADDPDZrr:
9993 case X86::VADDPSZrr:
9994 case X86::VADDSDrr:
9995 case X86::VADDSSrr:
9996 case X86::VADDSDZrr:
9997 case X86::VADDSSZrr:
9998 case X86::VMULPDrr:
9999 case X86::VMULPSrr:
10000 case X86::VMULPDYrr:
10001 case X86::VMULPSYrr:
10002 case X86::VMULPDZ128rr:
10003 case X86::VMULPSZ128rr:
10004 case X86::VMULPDZ256rr:
10005 case X86::VMULPSZ256rr:
10006 case X86::VMULPDZrr:
10007 case X86::VMULPSZrr:
10008 case X86::VMULSDrr:
10009 case X86::VMULSSrr:
10010 case X86::VMULSDZrr:
10011 case X86::VMULSSZrr:
10012 case X86::VADDPHZ128rr:
10013 case X86::VADDPHZ256rr:
10014 case X86::VADDPHZrr:
10015 case X86::VADDSHZrr:
10016 case X86::VMULPHZ128rr:
10017 case X86::VMULPHZ256rr:
10018 case X86::VMULPHZrr:
10019 case X86::VMULSHZrr:
10022 default:
10023 return false;
10024 }
10025}
10026
10027/// If \p DescribedReg overlaps with the MOVrr instruction's destination
10028/// register then, if possible, describe the value in terms of the source
10029/// register.
10030static std::optional<ParamLoadedValue>
10032 const TargetRegisterInfo *TRI) {
10033 Register DestReg = MI.getOperand(0).getReg();
10034 Register SrcReg = MI.getOperand(1).getReg();
10035
10036 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10037
10038 // If the described register is the destination, just return the source.
10039 if (DestReg == DescribedReg)
10040 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10041
10042 // If the described register is a sub-register of the destination register,
10043 // then pick out the source register's corresponding sub-register.
10044 if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) {
10045 Register SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx);
10046 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10047 }
10048
10049 // The remaining case to consider is when the described register is a
10050 // super-register of the destination register. MOV8rr and MOV16rr does not
10051 // write to any of the other bytes in the register, meaning that we'd have to
10052 // describe the value using a combination of the source register and the
10053 // non-overlapping bits in the described register, which is not currently
10054 // possible.
10055 if (MI.getOpcode() == X86::MOV8rr || MI.getOpcode() == X86::MOV16rr ||
10056 !TRI->isSuperRegister(DestReg, DescribedReg))
10057 return std::nullopt;
10058
10059 assert(MI.getOpcode() == X86::MOV32rr && "Unexpected super-register case");
10060 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10061}
10062
10063std::optional<ParamLoadedValue>
10065 const MachineOperand *Op = nullptr;
10066 DIExpression *Expr = nullptr;
10067
10069
10070 switch (MI.getOpcode()) {
10071 case X86::LEA32r:
10072 case X86::LEA64r:
10073 case X86::LEA64_32r: {
10074 // We may need to describe a 64-bit parameter with a 32-bit LEA.
10075 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10076 return std::nullopt;
10077
10078 // Operand 4 could be global address. For now we do not support
10079 // such situation.
10080 if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm())
10081 return std::nullopt;
10082
10083 const MachineOperand &Op1 = MI.getOperand(1);
10084 const MachineOperand &Op2 = MI.getOperand(3);
10085 assert(Op2.isReg() &&
10086 (Op2.getReg() == X86::NoRegister || Op2.getReg().isPhysical()));
10087
10088 // Omit situations like:
10089 // %rsi = lea %rsi, 4, ...
10090 if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) ||
10091 Op2.getReg() == MI.getOperand(0).getReg())
10092 return std::nullopt;
10093 else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister &&
10094 TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) ||
10095 (Op2.getReg() != X86::NoRegister &&
10096 TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg())))
10097 return std::nullopt;
10098
10099 int64_t Coef = MI.getOperand(2).getImm();
10100 int64_t Offset = MI.getOperand(4).getImm();
10102
10103 if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) {
10104 Op = &Op1;
10105 } else if (Op1.isFI())
10106 Op = &Op1;
10107
10108 if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) {
10109 Ops.push_back(dwarf::DW_OP_constu);
10110 Ops.push_back(Coef + 1);
10111 Ops.push_back(dwarf::DW_OP_mul);
10112 } else {
10113 if (Op && Op2.getReg() != X86::NoRegister) {
10114 int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false);
10115 if (dwarfReg < 0)
10116 return std::nullopt;
10117 else if (dwarfReg < 32) {
10118 Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg);
10119 Ops.push_back(0);
10120 } else {
10121 Ops.push_back(dwarf::DW_OP_bregx);
10122 Ops.push_back(dwarfReg);
10123 Ops.push_back(0);
10124 }
10125 } else if (!Op) {
10126 assert(Op2.getReg() != X86::NoRegister);
10127 Op = &Op2;
10128 }
10129
10130 if (Coef > 1) {
10131 assert(Op2.getReg() != X86::NoRegister);
10132 Ops.push_back(dwarf::DW_OP_constu);
10133 Ops.push_back(Coef);
10134 Ops.push_back(dwarf::DW_OP_mul);
10135 }
10136
10137 if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) &&
10138 Op2.getReg() != X86::NoRegister) {
10139 Ops.push_back(dwarf::DW_OP_plus);
10140 }
10141 }
10142
10144 Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops);
10145
10146 return ParamLoadedValue(*Op, Expr);
10147 }
10148 case X86::MOV8ri:
10149 case X86::MOV16ri:
10150 // TODO: Handle MOV8ri and MOV16ri.
10151 return std::nullopt;
10152 case X86::MOV32ri:
10153 case X86::MOV64ri:
10154 case X86::MOV64ri32:
10155 // MOV32ri may be used for producing zero-extended 32-bit immediates in
10156 // 64-bit parameters, so we need to consider super-registers.
10157 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10158 return std::nullopt;
10159 return ParamLoadedValue(MI.getOperand(1), Expr);
10160 case X86::MOV8rr:
10161 case X86::MOV16rr:
10162 case X86::MOV32rr:
10163 case X86::MOV64rr:
10164 return describeMOVrrLoadedValue(MI, Reg, TRI);
10165 case X86::XOR32rr: {
10166 // 64-bit parameters are zero-materialized using XOR32rr, so also consider
10167 // super-registers.
10168 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10169 return std::nullopt;
10170 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
10172 return std::nullopt;
10173 }
10174 case X86::MOVSX64rr32: {
10175 // We may need to describe the lower 32 bits of the MOVSX; for example, in
10176 // cases like this:
10177 //
10178 // $ebx = [...]
10179 // $rdi = MOVSX64rr32 $ebx
10180 // $esi = MOV32rr $edi
10181 if (!TRI->isSubRegisterEq(MI.getOperand(0).getReg(), Reg))
10182 return std::nullopt;
10183
10184 Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10185
10186 // If the described register is the destination register we need to
10187 // sign-extend the source register from 32 bits. The other case we handle
10188 // is when the described register is the 32-bit sub-register of the
10189 // destination register, in case we just need to return the source
10190 // register.
10191 if (Reg == MI.getOperand(0).getReg())
10192 Expr = DIExpression::appendExt(Expr, 32, 64, true);
10193 else
10194 assert(X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg) &&
10195 "Unhandled sub-register case for MOVSX64rr32");
10196
10197 return ParamLoadedValue(MI.getOperand(1), Expr);
10198 }
10199 default:
10200 assert(!MI.isMoveImmediate() && "Unexpected MoveImm instruction");
10202 }
10203}
10204
10205/// This is an architecture-specific helper function of reassociateOps.
10206/// Set special operand attributes for new instructions after reassociation.
10208 MachineInstr &OldMI2,
10209 MachineInstr &NewMI1,
10210 MachineInstr &NewMI2) const {
10211 // Integer instructions may define an implicit EFLAGS dest register operand.
10212 MachineOperand *OldFlagDef1 =
10213 OldMI1.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10214 MachineOperand *OldFlagDef2 =
10215 OldMI2.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10216
10217 assert(!OldFlagDef1 == !OldFlagDef2 &&
10218 "Unexpected instruction type for reassociation");
10219
10220 if (!OldFlagDef1 || !OldFlagDef2)
10221 return;
10222
10223 assert(OldFlagDef1->isDead() && OldFlagDef2->isDead() &&
10224 "Must have dead EFLAGS operand in reassociable instruction");
10225
10226 MachineOperand *NewFlagDef1 =
10227 NewMI1.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10228 MachineOperand *NewFlagDef2 =
10229 NewMI2.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10230
10231 assert(NewFlagDef1 && NewFlagDef2 &&
10232 "Unexpected operand in reassociable instruction");
10233
10234 // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
10235 // of this pass or other passes. The EFLAGS operands must be dead in these new
10236 // instructions because the EFLAGS operands in the original instructions must
10237 // be dead in order for reassociation to occur.
10238 NewFlagDef1->setIsDead();
10239 NewFlagDef2->setIsDead();
10240}
10241
10242std::pair<unsigned, unsigned>
10244 return std::make_pair(TF, 0u);
10245}
10246
10249 using namespace X86II;
10250 static const std::pair<unsigned, const char *> TargetFlags[] = {
10251 {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
10252 {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
10253 {MO_GOT, "x86-got"},
10254 {MO_GOTOFF, "x86-gotoff"},
10255 {MO_GOTPCREL, "x86-gotpcrel"},
10256 {MO_GOTPCREL_NORELAX, "x86-gotpcrel-norelax"},
10257 {MO_PLT, "x86-plt"},
10258 {MO_TLSGD, "x86-tlsgd"},
10259 {MO_TLSLD, "x86-tlsld"},
10260 {MO_TLSLDM, "x86-tlsldm"},
10261 {MO_GOTTPOFF, "x86-gottpoff"},
10262 {MO_INDNTPOFF, "x86-indntpoff"},
10263 {MO_TPOFF, "x86-tpoff"},
10264 {MO_DTPOFF, "x86-dtpoff"},
10265 {MO_NTPOFF, "x86-ntpoff"},
10266 {MO_GOTNTPOFF, "x86-gotntpoff"},
10267 {MO_DLLIMPORT, "x86-dllimport"},
10268 {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
10269 {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
10270 {MO_TLVP, "x86-tlvp"},
10271 {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
10272 {MO_SECREL, "x86-secrel"},
10273 {MO_COFFSTUB, "x86-coffstub"}};
10274 return ArrayRef(TargetFlags);
10275}
10276
10277namespace {
10278/// Create Global Base Reg pass. This initializes the PIC
10279/// global base register for x86-32.
10280struct CGBR : public MachineFunctionPass {
10281 static char ID;
10282 CGBR() : MachineFunctionPass(ID) {}
10283
10284 bool runOnMachineFunction(MachineFunction &MF) override {
10285 const X86TargetMachine *TM =
10286 static_cast<const X86TargetMachine *>(&MF.getTarget());
10287 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
10288
10289 // Only emit a global base reg in PIC mode.
10290 if (!TM->isPositionIndependent())
10291 return false;
10292
10294 Register GlobalBaseReg = X86FI->getGlobalBaseReg();
10295
10296 // If we didn't need a GlobalBaseReg, don't insert code.
10297 if (GlobalBaseReg == 0)
10298 return false;
10299
10300 // Insert the set of GlobalBaseReg into the first MBB of the function
10301 MachineBasicBlock &FirstMBB = MF.front();
10303 DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
10305 const X86InstrInfo *TII = STI.getInstrInfo();
10306
10307 Register PC;
10308 if (STI.isPICStyleGOT())
10309 PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
10310 else
10311 PC = GlobalBaseReg;
10312
10313 if (STI.is64Bit()) {
10314 if (TM->getCodeModel() == CodeModel::Large) {
10315 // In the large code model, we are aiming for this code, though the
10316 // register allocation may vary:
10317 // leaq .LN$pb(%rip), %rax
10318 // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
10319 // addq %rcx, %rax
10320 // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
10321 Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
10322 Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
10323 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
10324 .addReg(X86::RIP)
10325 .addImm(0)
10326 .addReg(0)
10328 .addReg(0);
10329 std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
10330 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
10331 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
10333 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
10334 .addReg(PBReg, RegState::Kill)
10335 .addReg(GOTReg, RegState::Kill);
10336 } else {
10337 // In other code models, use a RIP-relative LEA to materialize the
10338 // GOT.
10339 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
10340 .addReg(X86::RIP)
10341 .addImm(0)
10342 .addReg(0)
10343 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
10344 .addReg(0);
10345 }
10346 } else {
10347 // Operand of MovePCtoStack is completely ignored by asm printer. It's
10348 // only used in JIT code emission as displacement to pc.
10349 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
10350
10351 // If we're using vanilla 'GOT' PIC style, we should use relative
10352 // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
10353 if (STI.isPICStyleGOT()) {
10354 // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel],
10355 // %some_register
10356 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
10357 .addReg(PC)
10358 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
10360 }
10361 }
10362
10363 return true;
10364 }
10365
10366 StringRef getPassName() const override {
10367 return "X86 PIC Global Base Reg Initialization";
10368 }
10369
10370 void getAnalysisUsage(AnalysisUsage &AU) const override {
10371 AU.setPreservesCFG();
10373 }
10374};
10375} // namespace
10376
10377char CGBR::ID = 0;
10379
10380namespace {
10381struct LDTLSCleanup : public MachineFunctionPass {
10382 static char ID;
10383 LDTLSCleanup() : MachineFunctionPass(ID) {}
10384
10385 bool runOnMachineFunction(MachineFunction &MF) override {
10386 if (skipFunction(MF.getFunction()))
10387 return false;
10388
10390 if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
10391 // No point folding accesses if there isn't at least two.
10392 return false;
10393 }
10394
10396 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
10397 return VisitNode(DT->getRootNode(), 0);
10398 }
10399
10400 // Visit the dominator subtree rooted at Node in pre-order.
10401 // If TLSBaseAddrReg is non-null, then use that to replace any
10402 // TLS_base_addr instructions. Otherwise, create the register
10403 // when the first such instruction is seen, and then use it
10404 // as we encounter more instructions.
10405 bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
10406 MachineBasicBlock *BB = Node->getBlock();
10407 bool Changed = false;
10408
10409 // Traverse the current block.
10410 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
10411 ++I) {
10412 switch (I->getOpcode()) {
10413 case X86::TLS_base_addr32:
10414 case X86::TLS_base_addr64:
10415 if (TLSBaseAddrReg)
10416 I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
10417 else
10418 I = SetRegister(*I, &TLSBaseAddrReg);
10419 Changed = true;
10420 break;
10421 default:
10422 break;
10423 }
10424 }
10425
10426 // Visit the children of this block in the dominator tree.
10427 for (auto &I : *Node) {
10428 Changed |= VisitNode(I, TLSBaseAddrReg);
10429 }
10430
10431 return Changed;
10432 }
10433
10434 // Replace the TLS_base_addr instruction I with a copy from
10435 // TLSBaseAddrReg, returning the new instruction.
10436 MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I,
10437 unsigned TLSBaseAddrReg) {
10438 MachineFunction *MF = I.getParent()->getParent();
10439 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
10440 const bool is64Bit = STI.is64Bit();
10441 const X86InstrInfo *TII = STI.getInstrInfo();
10442
10443 // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
10445 BuildMI(*I.getParent(), I, I.getDebugLoc(),
10446 TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)
10447 .addReg(TLSBaseAddrReg);
10448
10449 // Erase the TLS_base_addr instruction.
10450 I.eraseFromParent();
10451
10452 return Copy;
10453 }
10454
10455 // Create a virtual register in *TLSBaseAddrReg, and populate it by
10456 // inserting a copy instruction after I. Returns the new instruction.
10457 MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
10458 MachineFunction *MF = I.getParent()->getParent();
10459 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
10460 const bool is64Bit = STI.is64Bit();
10461 const X86InstrInfo *TII = STI.getInstrInfo();
10462
10463 // Create a virtual register for the TLS base address.
10465 *TLSBaseAddrReg = RegInfo.createVirtualRegister(
10466 is64Bit ? &X86::GR64RegClass : &X86::GR32RegClass);
10467
10468 // Insert a copy from RAX/EAX to TLSBaseAddrReg.
10469 MachineInstr *Next = I.getNextNode();
10470 MachineInstr *Copy = BuildMI(*I.getParent(), Next, I.getDebugLoc(),
10471 TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
10472 .addReg(is64Bit ? X86::RAX : X86::EAX);
10473
10474 return Copy;
10475 }
10476
10477 StringRef getPassName() const override {
10478 return "Local Dynamic TLS Access Clean-up";
10479 }
10480
10481 void getAnalysisUsage(AnalysisUsage &AU) const override {
10482 AU.setPreservesCFG();
10485 }
10486};
10487} // namespace
10488
10489char LDTLSCleanup::ID = 0;
10491 return new LDTLSCleanup();
10492}
10493
10494/// Constants defining how certain sequences should be outlined.
10495///
10496/// \p MachineOutlinerDefault implies that the function is called with a call
10497/// instruction, and a return must be emitted for the outlined function frame.
10498///
10499/// That is,
10500///
10501/// I1 OUTLINED_FUNCTION:
10502/// I2 --> call OUTLINED_FUNCTION I1
10503/// I3 I2
10504/// I3
10505/// ret
10506///
10507/// * Call construction overhead: 1 (call instruction)
10508/// * Frame construction overhead: 1 (return instruction)
10509///
10510/// \p MachineOutlinerTailCall implies that the function is being tail called.
10511/// A jump is emitted instead of a call, and the return is already present in
10512/// the outlined sequence. That is,
10513///
10514/// I1 OUTLINED_FUNCTION:
10515/// I2 --> jmp OUTLINED_FUNCTION I1
10516/// ret I2
10517/// ret
10518///
10519/// * Call construction overhead: 1 (jump instruction)
10520/// * Frame construction overhead: 0 (don't need to return)
10521///
10523
10524std::optional<outliner::OutlinedFunction>
10526 const MachineModuleInfo &MMI,
10527 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
10528 unsigned SequenceSize = 0;
10529 for (auto &MI : RepeatedSequenceLocs[0]) {
10530 // FIXME: x86 doesn't implement getInstSizeInBytes, so
10531 // we can't tell the cost. Just assume each instruction
10532 // is one byte.
10533 if (MI.isDebugInstr() || MI.isKill())
10534 continue;
10535 SequenceSize += 1;
10536 }
10537
10538 // We check to see if CFI Instructions are present, and if they are
10539 // we find the number of CFI Instructions in the candidates.
10540 unsigned CFICount = 0;
10541 for (auto &I : RepeatedSequenceLocs[0]) {
10542 if (I.isCFIInstruction())
10543 CFICount++;
10544 }
10545
10546 // We compare the number of found CFI Instructions to the number of CFI
10547 // instructions in the parent function for each candidate. We must check this
10548 // since if we outline one of the CFI instructions in a function, we have to
10549 // outline them all for correctness. If we do not, the address offsets will be
10550 // incorrect between the two sections of the program.
10551 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10552 std::vector<MCCFIInstruction> CFIInstructions =
10553 C.getMF()->getFrameInstructions();
10554
10555 if (CFICount > 0 && CFICount != CFIInstructions.size())
10556 return std::nullopt;
10557 }
10558
10559 // FIXME: Use real size in bytes for call and ret instructions.
10560 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10561 for (outliner::Candidate &C : RepeatedSequenceLocs)
10562 C.setCallInfo(MachineOutlinerTailCall, 1);
10563
10564 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
10565 0, // Number of bytes to emit frame.
10566 MachineOutlinerTailCall // Type of frame.
10567 );
10568 }
10569
10570 if (CFICount > 0)
10571 return std::nullopt;
10572
10573 for (outliner::Candidate &C : RepeatedSequenceLocs)
10574 C.setCallInfo(MachineOutlinerDefault, 1);
10575
10576 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 1,
10578}
10579
10581 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10582 const Function &F = MF.getFunction();
10583
10584 // Does the function use a red zone? If it does, then we can't risk messing
10585 // with the stack.
10586 if (Subtarget.getFrameLowering()->has128ByteRedZone(MF)) {
10587 // It could have a red zone. If it does, then we don't want to touch it.
10589 if (!X86FI || X86FI->getUsesRedZone())
10590 return false;
10591 }
10592
10593 // If we *don't* want to outline from things that could potentially be deduped
10594 // then return false.
10595 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10596 return false;
10597
10598 // This function is viable for outlining, so return true.
10599 return true;
10600}
10601
10605 unsigned Flags) const {
10606 MachineInstr &MI = *MIT;
10607
10608 // Is this a terminator for a basic block?
10609 if (MI.isTerminator())
10610 // TargetInstrInfo::getOutliningType has already filtered out anything
10611 // that would break this, so we can allow it here.
10613
10614 // Don't outline anything that modifies or reads from the stack pointer.
10615 //
10616 // FIXME: There are instructions which are being manually built without
10617 // explicit uses/defs so we also have to check the MCInstrDesc. We should be
10618 // able to remove the extra checks once those are fixed up. For example,
10619 // sometimes we might get something like %rax = POP64r 1. This won't be
10620 // caught by modifiesRegister or readsRegister even though the instruction
10621 // really ought to be formed so that modifiesRegister/readsRegister would
10622 // catch it.
10623 if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
10624 MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
10625 MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
10627
10628 // Outlined calls change the instruction pointer, so don't read from it.
10629 if (MI.readsRegister(X86::RIP, &RI) ||
10630 MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
10631 MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
10633
10634 // Don't outline CFI instructions.
10635 if (MI.isCFIInstruction())
10637
10639}
10640
10643 const outliner::OutlinedFunction &OF) const {
10644 // If we're a tail call, we already have a return, so don't do anything.
10646 return;
10647
10648 // We're a normal call, so our sequence doesn't have a return instruction.
10649 // Add it in.
10650 MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RET64));
10651 MBB.insert(MBB.end(), retq);
10652}
10653
10657 // Is it a tail call?
10658 if (C.CallConstructionID == MachineOutlinerTailCall) {
10659 // Yes, just insert a JMP.
10660 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
10661 .addGlobalAddress(M.getNamedValue(MF.getName())));
10662 } else {
10663 // No, insert a call.
10664 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
10665 .addGlobalAddress(M.getNamedValue(MF.getName())));
10666 }
10667
10668 return It;
10669}
10670
10673 DebugLoc &DL,
10674 bool AllowSideEffects) const {
10675 const MachineFunction &MF = *MBB.getParent();
10676 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
10678
10679 if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
10680 // FIXME: Should we ignore MMX registers?
10681 return;
10682
10683 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10684 // Convert register to the 32-bit version. Both 'movl' and 'xorl' clear the
10685 // upper bits of a 64-bit register automagically.
10686 Reg = getX86SubSuperRegister(Reg, 32);
10687
10688 if (!AllowSideEffects)
10689 // XOR affects flags, so use a MOV instead.
10690 BuildMI(MBB, Iter, DL, get(X86::MOV32ri), Reg).addImm(0);
10691 else
10692 BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg)
10693 .addReg(Reg, RegState::Undef)
10694 .addReg(Reg, RegState::Undef);
10695 } else if (X86::VR128RegClass.contains(Reg)) {
10696 // XMM#
10697 if (!ST.hasSSE1())
10698 return;
10699
10700 // PXOR is safe to use because it doesn't affect flags.
10701 BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
10702 .addReg(Reg, RegState::Undef)
10703 .addReg(Reg, RegState::Undef);
10704 } else if (X86::VR256RegClass.contains(Reg)) {
10705 // YMM#
10706 if (!ST.hasAVX())
10707 return;
10708
10709 // VPXOR is safe to use because it doesn't affect flags.
10710 BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg)
10711 .addReg(Reg, RegState::Undef)
10712 .addReg(Reg, RegState::Undef);
10713 } else if (X86::VR512RegClass.contains(Reg)) {
10714 // ZMM#
10715 if (!ST.hasAVX512())
10716 return;
10717
10718 // VPXORY is safe to use because it doesn't affect flags.
10719 BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
10720 .addReg(Reg, RegState::Undef)
10721 .addReg(Reg, RegState::Undef);
10722 } else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) ||
10723 X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) ||
10724 X86::VK16RegClass.contains(Reg)) {
10725 if (!ST.hasVLX())
10726 return;
10727
10728 // KXOR is safe to use because it doesn't affect flags.
10729 unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
10730 BuildMI(MBB, Iter, DL, get(Op), Reg)
10731 .addReg(Reg, RegState::Undef)
10732 .addReg(Reg, RegState::Undef);
10733 }
10734}
10735
10737 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
10738 bool DoRegPressureReduce) const {
10739 unsigned Opc = Root.getOpcode();
10740 switch (Opc) {
10741 case X86::VPDPWSSDrr:
10742 case X86::VPDPWSSDrm:
10743 case X86::VPDPWSSDYrr:
10744 case X86::VPDPWSSDYrm: {
10745 if (!Subtarget.hasFastDPWSSD()) {
10747 return true;
10748 }
10749 break;
10750 }
10751 case X86::VPDPWSSDZ128r:
10752 case X86::VPDPWSSDZ128m:
10753 case X86::VPDPWSSDZ256r:
10754 case X86::VPDPWSSDZ256m:
10755 case X86::VPDPWSSDZr:
10756 case X86::VPDPWSSDZm: {
10757 if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10759 return true;
10760 }
10761 break;
10762 }
10763 }
10765 Patterns, DoRegPressureReduce);
10766}
10767
10768static void
10772 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
10773 MachineFunction *MF = Root.getMF();
10775
10776 unsigned Opc = Root.getOpcode();
10777 unsigned AddOpc = 0;
10778 unsigned MaddOpc = 0;
10779 switch (Opc) {
10780 default:
10781 assert(false && "It should not reach here");
10782 break;
10783 // vpdpwssd xmm2,xmm3,xmm1
10784 // -->
10785 // vpmaddwd xmm3,xmm3,xmm1
10786 // vpaddd xmm2,xmm2,xmm3
10787 case X86::VPDPWSSDrr:
10788 MaddOpc = X86::VPMADDWDrr;
10789 AddOpc = X86::VPADDDrr;
10790 break;
10791 case X86::VPDPWSSDrm:
10792 MaddOpc = X86::VPMADDWDrm;
10793 AddOpc = X86::VPADDDrr;
10794 break;
10795 case X86::VPDPWSSDZ128r:
10796 MaddOpc = X86::VPMADDWDZ128rr;
10797 AddOpc = X86::VPADDDZ128rr;
10798 break;
10799 case X86::VPDPWSSDZ128m:
10800 MaddOpc = X86::VPMADDWDZ128rm;
10801 AddOpc = X86::VPADDDZ128rr;
10802 break;
10803 // vpdpwssd ymm2,ymm3,ymm1
10804 // -->
10805 // vpmaddwd ymm3,ymm3,ymm1
10806 // vpaddd ymm2,ymm2,ymm3
10807 case X86::VPDPWSSDYrr:
10808 MaddOpc = X86::VPMADDWDYrr;
10809 AddOpc = X86::VPADDDYrr;
10810 break;
10811 case X86::VPDPWSSDYrm:
10812 MaddOpc = X86::VPMADDWDYrm;
10813 AddOpc = X86::VPADDDYrr;
10814 break;
10815 case X86::VPDPWSSDZ256r:
10816 MaddOpc = X86::VPMADDWDZ256rr;
10817 AddOpc = X86::VPADDDZ256rr;
10818 break;
10819 case X86::VPDPWSSDZ256m:
10820 MaddOpc = X86::VPMADDWDZ256rm;
10821 AddOpc = X86::VPADDDZ256rr;
10822 break;
10823 // vpdpwssd zmm2,zmm3,zmm1
10824 // -->
10825 // vpmaddwd zmm3,zmm3,zmm1
10826 // vpaddd zmm2,zmm2,zmm3
10827 case X86::VPDPWSSDZr:
10828 MaddOpc = X86::VPMADDWDZrr;
10829 AddOpc = X86::VPADDDZrr;
10830 break;
10831 case X86::VPDPWSSDZm:
10832 MaddOpc = X86::VPMADDWDZrm;
10833 AddOpc = X86::VPADDDZrr;
10834 break;
10835 }
10836 // Create vpmaddwd.
10837 const TargetRegisterClass *RC =
10838 RegInfo.getRegClass(Root.getOperand(0).getReg());
10839 Register NewReg = RegInfo.createVirtualRegister(RC);
10840 MachineInstr *Madd = Root.getMF()->CloneMachineInstr(&Root);
10841 Madd->setDesc(TII.get(MaddOpc));
10842 Madd->untieRegOperand(1);
10843 Madd->removeOperand(1);
10844 Madd->getOperand(0).setReg(NewReg);
10845 InstrIdxForVirtReg.insert(std::make_pair(NewReg, 0));
10846 // Create vpaddd.
10847 Register DstReg = Root.getOperand(0).getReg();
10848 bool IsKill = Root.getOperand(1).isKill();
10849 MachineInstr *Add =
10850 BuildMI(*MF, MIMetadata(Root), TII.get(AddOpc), DstReg)
10851 .addReg(Root.getOperand(1).getReg(), getKillRegState(IsKill))
10852 .addReg(Madd->getOperand(0).getReg(), getKillRegState(true));
10853 InsInstrs.push_back(Madd);
10854 InsInstrs.push_back(Add);
10855 DelInstrs.push_back(&Root);
10856}
10857
10859 MachineInstr &Root, unsigned Pattern,
10862 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
10863 switch (Pattern) {
10864 default:
10865 // Reassociate instructions.
10867 DelInstrs, InstrIdxForVirtReg);
10868 return;
10870 genAlternativeDpCodeSequence(Root, *this, InsInstrs, DelInstrs,
10871 InstrIdxForVirtReg);
10872 return;
10873 }
10874}
10875
10876// See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
10878 int FI) const {
10880 M.BaseType = X86AddressMode::FrameIndexBase;
10881 M.Base.FrameIndex = FI;
10882 M.getFullAddress(Ops);
10883}
10884
10885#define GET_INSTRINFO_HELPERS
10886#include "X86GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerDefault
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:109
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
Expand a single-def pseudo instruction to a two-addr instruction with two undef reads of the register...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool IsDead
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
#define FROM_TO(FROM, TO)
static bool is64Bit(const char *name)
#define GET_EGPR_IF_ENABLED(OPC)
static bool isLEA(unsigned Opcode)
static void addOperands(MachineInstrBuilder &MIB, ArrayRef< MachineOperand > MOs, int PtrOffset=0)
static std::optional< ParamLoadedValue > describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetRegisterInfo *TRI)
If DescribedReg overlaps with the MOVrr instruction's destination register then, if possible,...
static cl::opt< unsigned > PartialRegUpdateClearance("partial-reg-update-clearance", cl::desc("Clearance between two register writes " "for inserting XOR to avoid partial " "register update"), cl::init(64), cl::Hidden)
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI)
static bool isConvertibleLEA(MachineInstr *MI)
static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, const X86Subtarget &Subtarget)
static bool isAMXOpcode(unsigned Opc)
static int getJumpTableIndexFromReg(const MachineRegisterInfo &MRI, Register Reg)
static void updateOperandRegConstraints(MachineFunction &MF, MachineInstr &NewMI, const TargetInstrInfo &TII)
static bool findRedundantFlagInstr(MachineInstr &CmpInstr, MachineInstr &CmpValDefInstr, const MachineRegisterInfo *MRI, MachineInstr **AndInstr, const TargetRegisterInfo *TRI, bool &NoSignFlag, bool &ClearsOverflowFlag)
static int getJumpTableIndexFromAddr(const MachineInstr &MI)
static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, unsigned NewWidth, unsigned *pNewMask=nullptr)
static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne)
static unsigned getNewOpcFromTable(ArrayRef< X86TableEntry > Table, unsigned Opc)
static unsigned getStoreRegOpcode(Register SrcReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
#define FOLD_BROADCAST(SIZE)
static cl::opt< unsigned > UndefRegClearance("undef-reg-clearance", cl::desc("How many idle instructions we would like before " "certain undef register reads"), cl::init(128), cl::Hidden)
#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64)
static bool isTruncatedShiftCountForLEA(unsigned ShAmt)
Check whether the given shift count is appropriate can be represented by a LEA instruction.
static cl::opt< bool > ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden)
static SmallVector< MachineMemOperand *, 2 > extractLoadMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static MachineInstr * fuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII)
static void printFailMsgforFold(const MachineInstr &MI, unsigned Idx)
static bool canConvert2Copy(unsigned Opc)
static cl::opt< bool > NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions"), cl::Hidden)
static bool expandNOVLXStore(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx)
static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes)
static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, Register Reg)
Expand a single-def pseudo instruction to a two-addr instruction with two k0 reads.
#define VPERM_CASES_BROADCAST(Suffix)
static X86::CondCode isUseDefConvertible(const MachineInstr &MI)
Check whether the use can be converted to remove a comparison against zero.
static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
static unsigned getLoadRegOpcode(Register DestReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, bool ForLoadFold=false)
static MachineInstr * makeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI)
#define GET_ND_IF_ENABLED(OPC)
static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget, bool ForLoadFold=false)
Return true for all instructions that only update the first 32 or 64-bits of the destination register...
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, const X86Subtarget &Subtarget)
static const uint16_t * lookupAVX512(unsigned opcode, unsigned domain, ArrayRef< uint16_t[4]> Table)
static unsigned getLoadStoreRegOpcode(Register Reg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI, bool Load)
#define VPERM_CASES(Suffix)
#define FROM_TO_SIZE(A, B, S)
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes)
static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, bool &ClearsOverflowFlag)
Check whether the definition can be converted to remove a comparison against zero.
static bool isHReg(unsigned Reg)
Test if the given register is a physical h register.
static MachineInstr * fuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII, int PtrOffset=0)
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode)
static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static MachineBasicBlock * getFallThroughMBB(MachineBasicBlock *MBB, MachineBasicBlock *TBB)
static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineInstr &UserMI, const MachineFunction &MF)
Check if LoadMI is a partial register load that we can't fold into MI because the latter uses content...
static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI)
static cl::opt< bool > PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to" " fuse, but the X86 backend currently can't"), cl::Hidden)
static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx)
#define CASE_ND(OP)
static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
This determines which of three possible cases of a three source commute the source indexes correspond...
static bool isX87Reg(unsigned Reg)
Return true if the Reg is X87 register.
static void genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
static unsigned getTruncatedShiftCount(const MachineInstr &MI, unsigned ShiftAmtOperandIdx)
Check whether the shift count for a machine operand is non-zero.
static SmallVector< MachineMemOperand *, 2 > extractStoreMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static unsigned getBroadcastOpcode(const X86FoldTableEntry *I, const TargetRegisterClass *RC, const X86Subtarget &STI)
static unsigned convertALUrr2ALUri(unsigned Opc)
Convert an ALUrr opcode to corresponding ALUri opcode.
static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI)
Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
static bool isCommutableVPERMV3Instruction(unsigned Opcode)
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:760
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:768
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:766
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:773
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:767
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
DWARF expression.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
static DIExpression * appendExt(const DIExpression *Expr, unsigned FromSize, unsigned ToSize, bool Signed)
Append a zero- or sign-extension to Expr.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Base class for the actual dominator tree node.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
void stepForward(const MachineInstr &MI, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand * > > &Clobbers)
Simulates liveness when stepping forward over an instruction(bundle).
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:408
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:759
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:581
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
unsigned char NumDefs
Definition: MCInstrDesc.h:207
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
Set of metadata that should be preserved when using BuildMI().
SimpleValueType SimpleTy
unsigned pred_size() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
@ LQR_Dead
Register is known to be fully dead.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
union llvm::MachineConstantPoolEntry::@204 Val
The constant itself.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * getRootNode() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
mop_iterator operands_begin()
Definition: MachineInstr.h:679
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void dropDebugNumber()
Drop any variable location debugging information associated with this instruction.
Definition: MachineInstr.h:555
void setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just prior to the instruction itself.
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
bool isSafeToMove(bool &SawStore) const
Return true if it is safe to move this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:815
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:403
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
unsigned getNumDefs() const
Returns the total number of definitions.
Definition: MachineInstr.h:638
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateCPI(unsigned Idx, int Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const TargetRegisterInfo * getTargetRegisterInfo() const
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:224
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const
Return true when \P Inst has reassociable operands in the same \P MBB.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
bool isPositionIndependent() const
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TypeSize getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
LLVM Value Representation.
Definition: Value.h:74
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void getFrameIndexOperands(SmallVectorImpl< MachineOperand > &Ops, int FI) const override
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
Check if there exists an earlier instruction that operates on the same source operands and sets eflag...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Overrides the isSchedulingBoundary from Codegen/TargetInstrInfo.cpp to make it capable of identifying...
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
void replaceBranchWithTailCall(MachineBasicBlock &MBB, SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Returns true iff the routine could find two commutable operands in the given machine instruction.
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: X86InstrInfo.h:211
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override
Returns true if we have preference on the operands order in MI, the commute decision is returned in C...
bool hasLiveCondCodeDef(MachineInstr &MI) const
True if MI has a condition code def, e.g.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool canMakeTailCallConditional(SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
convertToThreeAddress - This method must be implemented by targets that set the M_CONVERTIBLE_TO_3_AD...
X86InstrInfo(X86Subtarget &STI)
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
MCInst getNop() const override
Return the noop instruction to use for a noop.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
This is a used by the pre-regalloc scheduler to determine (in conjunction with areLoadsFromSameBasePt...
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr * > &NewMIs) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
Fold a load or store of the specified stack slot into the specified machine instruction for the speci...
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds potential patterns, this function generates the instructions ...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isStoreToStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
bool isUnconditionalTailCall(const MachineInstr &MI) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isLoadFromStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const
int getSPAdjust(const MachineInstr &MI) const override
getSPAdjust - This returns the stack pointer adjustment made by this instruction.
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
int getJumpTableIndex(const MachineInstr &MI) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override
This is an architecture-specific helper function of reassociateOps.
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
isCoalescableExtInstr - Return true if the instruction is a "coalescable" extension instruction.
void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Opc, Register Reg, int FrameIdx, bool isKill=false) const
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned LEAOpcode, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV, LiveIntervals *LIS) const
Given an operand within a MachineInstr, insert preceding code to put it into the right format for a p...
bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify=false) const override
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before certain undef register...
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
uint16_t getExecutionDomainCustom(const MachineInstr &MI) const
bool isHighLatencyDef(int opc) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, const X86InstrFMA3Group &FMA3Group) const
Returns an adjusted FMA opcode that must be used in FMA instruction that performs the same computatio...
bool preservesZeroValueInReg(const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before a partial register upd...
MachineInstr * optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, Register &FoldAsLoadDefReg, MachineInstr *&DefMI) const override
Try to remove the load by folding it to a register operand at the use.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
unsigned getNumLocalDynamicTLSAccesses() const
bool canRealignStack(const MachineFunction &MF) const override
bool isPICStyleGOT() const
Definition: X86Subtarget.h:328
bool canUseCMOV() const
Definition: X86Subtarget.h:192
bool isTargetWin64() const
Definition: X86Subtarget.h:324
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool hasAVX512() const
Definition: X86Subtarget.h:201
bool hasSSE41() const
Definition: X86Subtarget.h:197
bool hasSSE2() const
Definition: X86Subtarget.h:194
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:132
bool hasAVX() const
Definition: X86Subtarget.h:199
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:124
bool hasAVX2() const
Definition: X86Subtarget.h:200
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
@ OPERAND_MEMORY
Definition: MCInstrDesc.h:62
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ X86
Windows x64, Windows Itanium (IA-64)
Reg
All possible values of the reg field in the ModR/M byte.
bool isKMergeMasked(uint64_t TSFlags)
Definition: X86BaseInfo.h:1319
@ MO_GOT_ABSOLUTE_ADDRESS
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [.
Definition: X86BaseInfo.h:367
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
Definition: X86BaseInfo.h:432
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
Definition: X86BaseInfo.h:456
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
Definition: X86BaseInfo.h:425
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:371
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
bool canUseApxExtendedReg(const MCInstrDesc &Desc)
Definition: X86BaseInfo.h:1260
bool isPseudo(uint64_t TSFlags)
Definition: X86BaseInfo.h:887
bool isKMasked(uint64_t TSFlags)
Definition: X86BaseInfo.h:1314
int getMemoryOperandNo(uint64_t TSFlags)
Definition: X86BaseInfo.h:1011
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:968
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
Definition: X86BaseInfo.h:825
@ SSEDomainShift
Execution domain for SSE instructions.
Definition: X86BaseInfo.h:811
CondCode getCondFromBranch(const MachineInstr &MI)
CondCode getCondFromCFCMov(const MachineInstr &MI)
@ LAST_VALID_COND
Definition: X86BaseInfo.h:94
CondCode getCondFromMI(const MachineInstr &MI)
Return the condition code of the instruction.
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
@ AddrScaleAmt
Definition: X86BaseInfo.h:30
@ AddrSegmentReg
Definition: X86BaseInfo.h:34
@ AddrIndexReg
Definition: X86BaseInfo.h:31
@ AddrNumOperands
Definition: X86BaseInfo.h:36
unsigned getSwappedVCMPImm(unsigned Imm)
Get the VCMP immediate if the opcodes are swapped.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getSwappedVPCOMImm(unsigned Imm)
Get the VPCOM immediate if the opcodes are swapped.
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
unsigned getNonNDVariant(unsigned Opc)
unsigned getVPCMPImmForCond(ISD::CondCode CC)
Get the VPCMP immediate for the given condition.
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
CondCode getCondFromSETCC(const MachineInstr &MI)
unsigned getSwappedVPCMPImm(unsigned Imm)
Get the VPCMP immediate if the opcodes are swapped.
CondCode getCondFromCCMP(const MachineInstr &MI)
int getCCMPCondFlagsFromCondCode(CondCode CC)
int getCondSrcNoFromDesc(const MCInstrDesc &MCID)
Return the source operand # for condition code by MCID.
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
unsigned getNFVariant(unsigned Opc)
unsigned getVectorRegisterWidth(const MCOperandInfo &Info)
Get the width of the vector register operand.
CondCode getCondFromCMov(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:170
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createX86GlobalBaseRegPass()
This pass initializes a global base register for PIC on x86-32.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
MaybeAlign getAlign(const Function &F, unsigned Index)
FunctionPass * createCleanupLocalDynamicTLSPass()
This pass combines multiple accesses to local-dynamic TLS variables so that the TLS base address for ...
const X86FoldTableEntry * lookupBroadcastFoldTable(unsigned RegOp, unsigned OpNum)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
const X86InstrFMA3Group * getFMA3Group(unsigned Opcode, uint64_t TSFlags)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
const X86FoldTableEntry * lookupTwoAddrFoldTable(unsigned RegOp)
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
unsigned getUndefRegState(bool B)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
unsigned getDefRegState(bool B)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1961
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
@ DPWSSD
Definition: X86InstrInfo.h:32
const X86FoldTableEntry * lookupUnfoldTable(unsigned MemOp)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool matchBroadcastSize(const X86FoldTableEntry &Entry, unsigned BroadcastBits)
const X86FoldTableEntry * lookupFoldTable(unsigned RegOp, unsigned OpNum)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
Extended Value Type.
Definition: ValueTypes.h:35
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:88
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
X86AddressMode - This struct holds a generalized full x86 address mode.
This class is used to group {132, 213, 231} forms of FMA opcodes together.
unsigned get213Opcode() const
Returns the 213 form of FMA opcode.
unsigned get231Opcode() const
Returns the 231 form of FMA opcode.
bool isIntrinsic() const
Returns true iff the group of FMA opcodes holds intrinsic opcodes.
unsigned get132Opcode() const
Returns the 132 form of FMA opcode.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.