51#define DEBUG_TYPE "x86-instr-info"
53#define GET_INSTRINFO_CTOR_DTOR
54#include "X86GenInstrInfo.inc"
58 cl::desc(
"Disable fusing of spill code into instructions"),
62 cl::desc(
"Print instructions that the allocator wants to"
63 " fuse, but the X86 backend currently can't"),
67 cl::desc(
"Re-materialize load from stub in PIC mode"),
71 cl::desc(
"Clearance between two register writes "
72 "for inserting XOR to avoid partial "
76 "undef-reg-clearance",
77 cl::desc(
"How many idle instructions we would like before "
78 "certain undef register reads"),
82void X86InstrInfo::anchor() {}
86 : X86::ADJCALLSTACKDOWN32),
87 (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
88 : X86::ADJCALLSTACKUP32),
89 X86::CATCHRET, (STI.
is64Bit() ? X86::RET64 : X86::RET32)),
90 Subtarget(STI), RI(STI.getTargetTriple()) {}
99 if (!RC || !Subtarget.hasEGPR())
105 switch (RC->getID()) {
108 case X86::GR8RegClassID:
109 return &X86::GR8_NOREX2RegClass;
110 case X86::GR16RegClassID:
111 return &X86::GR16_NOREX2RegClass;
112 case X86::GR32RegClassID:
113 return &X86::GR32_NOREX2RegClass;
114 case X86::GR64RegClassID:
115 return &X86::GR64_NOREX2RegClass;
116 case X86::GR32_NOSPRegClassID:
117 return &X86::GR32_NOREX2_NOSPRegClass;
118 case X86::GR64_NOSPRegClassID:
119 return &X86::GR64_NOREX2_NOSPRegClass;
125 unsigned &SubIdx)
const {
126 switch (
MI.getOpcode()) {
129 case X86::MOVSX16rr8:
130 case X86::MOVZX16rr8:
131 case X86::MOVSX32rr8:
132 case X86::MOVZX32rr8:
133 case X86::MOVSX64rr8:
134 if (!Subtarget.is64Bit())
139 case X86::MOVSX32rr16:
140 case X86::MOVZX32rr16:
141 case X86::MOVSX64rr16:
142 case X86::MOVSX64rr32: {
143 if (
MI.getOperand(0).getSubReg() ||
MI.getOperand(1).getSubReg())
146 SrcReg =
MI.getOperand(1).getReg();
147 DstReg =
MI.getOperand(0).getReg();
148 switch (
MI.getOpcode()) {
151 case X86::MOVSX16rr8:
152 case X86::MOVZX16rr8:
153 case X86::MOVSX32rr8:
154 case X86::MOVZX32rr8:
155 case X86::MOVSX64rr8:
156 SubIdx = X86::sub_8bit;
158 case X86::MOVSX32rr16:
159 case X86::MOVZX32rr16:
160 case X86::MOVSX64rr16:
161 SubIdx = X86::sub_16bit;
163 case X86::MOVSX64rr32:
164 SubIdx = X86::sub_32bit;
174 if (
MI.mayLoad() ||
MI.mayStore())
179 if (
MI.isCopyLike() ||
MI.isInsertSubreg())
182 unsigned Opcode =
MI.getOpcode();
193 if (isBSF(Opcode) || isBSR(Opcode) || isLZCNT(Opcode) || isPOPCNT(Opcode) ||
199 if (isBLCFILL(Opcode) || isBLCI(Opcode) || isBLCIC(Opcode) ||
200 isBLCMSK(Opcode) || isBLCS(Opcode) || isBLSFILL(Opcode) ||
201 isBLSI(Opcode) || isBLSIC(Opcode) || isBLSMSK(Opcode) || isBLSR(Opcode) ||
206 if (isBEXTR(Opcode) || isBZHI(Opcode))
209 if (isROL(Opcode) || isROR(Opcode) || isSAR(Opcode) || isSHL(Opcode) ||
210 isSHR(Opcode) || isSHLD(Opcode) || isSHRD(Opcode))
213 if (isADC(Opcode) || isADD(Opcode) || isAND(Opcode) || isOR(Opcode) ||
214 isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode))
220 if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode))
228 if (isMOVSX(Opcode) || isMOVZX(Opcode) || isMOVSXD(Opcode) || isMOV(Opcode))
231 if (isRORX(Opcode) || isSARX(Opcode) || isSHLX(Opcode) || isSHRX(Opcode))
241 switch (
MI.getOpcode()) {
254 case X86::IMUL64rmi32:
269 case X86::POPCNT16rm:
270 case X86::POPCNT32rm:
271 case X86::POPCNT64rm:
279 case X86::BLCFILL32rm:
280 case X86::BLCFILL64rm:
285 case X86::BLCMSK32rm:
286 case X86::BLCMSK64rm:
289 case X86::BLSFILL32rm:
290 case X86::BLSFILL64rm:
295 case X86::BLSMSK32rm:
296 case X86::BLSMSK64rm:
306 case X86::BEXTRI32mi:
307 case X86::BEXTRI64mi:
360 case X86::CVTTSD2SI64rm:
361 case X86::VCVTTSD2SI64rm:
362 case X86::VCVTTSD2SI64Zrm:
363 case X86::CVTTSD2SIrm:
364 case X86::VCVTTSD2SIrm:
365 case X86::VCVTTSD2SIZrm:
366 case X86::CVTTSS2SI64rm:
367 case X86::VCVTTSS2SI64rm:
368 case X86::VCVTTSS2SI64Zrm:
369 case X86::CVTTSS2SIrm:
370 case X86::VCVTTSS2SIrm:
371 case X86::VCVTTSS2SIZrm:
372 case X86::CVTSI2SDrm:
373 case X86::VCVTSI2SDrm:
374 case X86::VCVTSI2SDZrm:
375 case X86::CVTSI2SSrm:
376 case X86::VCVTSI2SSrm:
377 case X86::VCVTSI2SSZrm:
378 case X86::CVTSI642SDrm:
379 case X86::VCVTSI642SDrm:
380 case X86::VCVTSI642SDZrm:
381 case X86::CVTSI642SSrm:
382 case X86::VCVTSI642SSrm:
383 case X86::VCVTSI642SSZrm:
384 case X86::CVTSS2SDrm:
385 case X86::VCVTSS2SDrm:
386 case X86::VCVTSS2SDZrm:
387 case X86::CVTSD2SSrm:
388 case X86::VCVTSD2SSrm:
389 case X86::VCVTSD2SSZrm:
391 case X86::VCVTTSD2USI64Zrm:
392 case X86::VCVTTSD2USIZrm:
393 case X86::VCVTTSS2USI64Zrm:
394 case X86::VCVTTSS2USIZrm:
395 case X86::VCVTUSI2SDZrm:
396 case X86::VCVTUSI642SDZrm:
397 case X86::VCVTUSI2SSZrm:
398 case X86::VCVTUSI642SSZrm:
402 case X86::MOV8rm_NOREX:
406 case X86::MOVSX16rm8:
407 case X86::MOVSX32rm16:
408 case X86::MOVSX32rm8:
409 case X86::MOVSX32rm8_NOREX:
410 case X86::MOVSX64rm16:
411 case X86::MOVSX64rm32:
412 case X86::MOVSX64rm8:
413 case X86::MOVZX16rm8:
414 case X86::MOVZX32rm16:
415 case X86::MOVZX32rm8:
416 case X86::MOVZX32rm8_NOREX:
417 case X86::MOVZX64rm16:
418 case X86::MOVZX64rm8:
427 if (isFrameInstr(
MI)) {
430 if (!isFrameSetup(
MI))
441 for (
auto E =
MBB->
end();
I != E; ++
I) {
442 if (
I->getOpcode() == getCallFrameDestroyOpcode() ||
I->isCall())
448 if (
I->getOpcode() != getCallFrameDestroyOpcode())
451 return -(
I->getOperand(1).getImm());
456 switch (
MI.getOpcode()) {
475 int &FrameIndex)
const {
495 case X86::KMOVBkm_EVEX:
500 case X86::KMOVWkm_EVEX:
502 case X86::VMOVSHZrm_alt:
507 case X86::MOVSSrm_alt:
509 case X86::VMOVSSrm_alt:
511 case X86::VMOVSSZrm_alt:
513 case X86::KMOVDkm_EVEX:
519 case X86::MOVSDrm_alt:
521 case X86::VMOVSDrm_alt:
523 case X86::VMOVSDZrm_alt:
524 case X86::MMX_MOVD64rm:
525 case X86::MMX_MOVQ64rm:
527 case X86::KMOVQkm_EVEX:
542 case X86::VMOVAPSZ128rm:
543 case X86::VMOVUPSZ128rm:
544 case X86::VMOVAPSZ128rm_NOVLX:
545 case X86::VMOVUPSZ128rm_NOVLX:
546 case X86::VMOVAPDZ128rm:
547 case X86::VMOVUPDZ128rm:
548 case X86::VMOVDQU8Z128rm:
549 case X86::VMOVDQU16Z128rm:
550 case X86::VMOVDQA32Z128rm:
551 case X86::VMOVDQU32Z128rm:
552 case X86::VMOVDQA64Z128rm:
553 case X86::VMOVDQU64Z128rm:
556 case X86::VMOVAPSYrm:
557 case X86::VMOVUPSYrm:
558 case X86::VMOVAPDYrm:
559 case X86::VMOVUPDYrm:
560 case X86::VMOVDQAYrm:
561 case X86::VMOVDQUYrm:
562 case X86::VMOVAPSZ256rm:
563 case X86::VMOVUPSZ256rm:
564 case X86::VMOVAPSZ256rm_NOVLX:
565 case X86::VMOVUPSZ256rm_NOVLX:
566 case X86::VMOVAPDZ256rm:
567 case X86::VMOVUPDZ256rm:
568 case X86::VMOVDQU8Z256rm:
569 case X86::VMOVDQU16Z256rm:
570 case X86::VMOVDQA32Z256rm:
571 case X86::VMOVDQU32Z256rm:
572 case X86::VMOVDQA64Z256rm:
573 case X86::VMOVDQU64Z256rm:
576 case X86::VMOVAPSZrm:
577 case X86::VMOVUPSZrm:
578 case X86::VMOVAPDZrm:
579 case X86::VMOVUPDZrm:
580 case X86::VMOVDQU8Zrm:
581 case X86::VMOVDQU16Zrm:
582 case X86::VMOVDQA32Zrm:
583 case X86::VMOVDQU32Zrm:
584 case X86::VMOVDQA64Zrm:
585 case X86::VMOVDQU64Zrm:
597 case X86::KMOVBmk_EVEX:
602 case X86::KMOVWmk_EVEX:
611 case X86::KMOVDmk_EVEX:
619 case X86::MMX_MOVD64mr:
620 case X86::MMX_MOVQ64mr:
621 case X86::MMX_MOVNTQmr:
623 case X86::KMOVQmk_EVEX:
638 case X86::VMOVUPSZ128mr:
639 case X86::VMOVAPSZ128mr:
640 case X86::VMOVUPSZ128mr_NOVLX:
641 case X86::VMOVAPSZ128mr_NOVLX:
642 case X86::VMOVUPDZ128mr:
643 case X86::VMOVAPDZ128mr:
644 case X86::VMOVDQA32Z128mr:
645 case X86::VMOVDQU32Z128mr:
646 case X86::VMOVDQA64Z128mr:
647 case X86::VMOVDQU64Z128mr:
648 case X86::VMOVDQU8Z128mr:
649 case X86::VMOVDQU16Z128mr:
652 case X86::VMOVUPSYmr:
653 case X86::VMOVAPSYmr:
654 case X86::VMOVUPDYmr:
655 case X86::VMOVAPDYmr:
656 case X86::VMOVDQUYmr:
657 case X86::VMOVDQAYmr:
658 case X86::VMOVUPSZ256mr:
659 case X86::VMOVAPSZ256mr:
660 case X86::VMOVUPSZ256mr_NOVLX:
661 case X86::VMOVAPSZ256mr_NOVLX:
662 case X86::VMOVUPDZ256mr:
663 case X86::VMOVAPDZ256mr:
664 case X86::VMOVDQU8Z256mr:
665 case X86::VMOVDQU16Z256mr:
666 case X86::VMOVDQA32Z256mr:
667 case X86::VMOVDQU32Z256mr:
668 case X86::VMOVDQA64Z256mr:
669 case X86::VMOVDQU64Z256mr:
672 case X86::VMOVUPSZmr:
673 case X86::VMOVAPSZmr:
674 case X86::VMOVUPDZmr:
675 case X86::VMOVAPDZmr:
676 case X86::VMOVDQU8Zmr:
677 case X86::VMOVDQU16Zmr:
678 case X86::VMOVDQA32Zmr:
679 case X86::VMOVDQU32Zmr:
680 case X86::VMOVDQA64Zmr:
681 case X86::VMOVDQU64Zmr:
689 int &FrameIndex)
const {
696 unsigned &MemBytes)
const {
698 if (
MI.getOperand(0).getSubReg() == 0 && isFrameOperand(
MI, 1, FrameIndex))
699 return MI.getOperand(0).getReg();
704 int &FrameIndex)
const {
712 if (hasLoadFromStackSlot(
MI, Accesses)) {
714 cast<FixedStackPseudoSourceValue>(Accesses.
front()->getPseudoValue())
716 return MI.getOperand(0).getReg();
723 int &FrameIndex)
const {
730 unsigned &MemBytes)
const {
733 isFrameOperand(
MI, 0, FrameIndex))
739 int &FrameIndex)
const {
747 if (hasStoreToStackSlot(
MI, Accesses)) {
749 cast<FixedStackPseudoSourceValue>(Accesses.
front()->getPseudoValue())
762 bool isPICBase =
false;
764 if (
DefMI.getOpcode() != X86::MOVPC32r)
766 assert(!isPICBase &&
"More than one PIC base?");
774 switch (
MI.getOpcode()) {
780 case X86::IMPLICIT_DEF:
783 case X86::LOAD_STACK_GUARD:
790 case X86::AVX1_SETALLONES:
791 case X86::AVX2_SETALLONES:
792 case X86::AVX512_128_SET0:
793 case X86::AVX512_256_SET0:
794 case X86::AVX512_512_SET0:
795 case X86::AVX512_512_SETALLONES:
796 case X86::AVX512_FsFLD0SD:
797 case X86::AVX512_FsFLD0SH:
798 case X86::AVX512_FsFLD0SS:
799 case X86::AVX512_FsFLD0F128:
804 case X86::FsFLD0F128:
812 case X86::MOV32ImmSExti8:
817 case X86::MOV64ImmSExti8:
819 case X86::V_SETALLONES:
825 case X86::PTILEZEROV:
829 case X86::MOV8rm_NOREX:
834 case X86::MOVSSrm_alt:
836 case X86::MOVSDrm_alt:
844 case X86::VMOVSSrm_alt:
846 case X86::VMOVSDrm_alt:
853 case X86::VMOVAPSYrm:
854 case X86::VMOVUPSYrm:
855 case X86::VMOVAPDYrm:
856 case X86::VMOVUPDYrm:
857 case X86::VMOVDQAYrm:
858 case X86::VMOVDQUYrm:
859 case X86::MMX_MOVD64rm:
860 case X86::MMX_MOVQ64rm:
861 case X86::VBROADCASTSSrm:
862 case X86::VBROADCASTSSYrm:
863 case X86::VBROADCASTSDYrm:
865 case X86::VPBROADCASTBZ128rm:
866 case X86::VPBROADCASTBZ256rm:
867 case X86::VPBROADCASTBZrm:
868 case X86::VBROADCASTF32X2Z256rm:
869 case X86::VBROADCASTF32X2Zrm:
870 case X86::VBROADCASTI32X2Z128rm:
871 case X86::VBROADCASTI32X2Z256rm:
872 case X86::VBROADCASTI32X2Zrm:
873 case X86::VPBROADCASTWZ128rm:
874 case X86::VPBROADCASTWZ256rm:
875 case X86::VPBROADCASTWZrm:
876 case X86::VPBROADCASTDZ128rm:
877 case X86::VPBROADCASTDZ256rm:
878 case X86::VPBROADCASTDZrm:
879 case X86::VBROADCASTSSZ128rm:
880 case X86::VBROADCASTSSZ256rm:
881 case X86::VBROADCASTSSZrm:
882 case X86::VPBROADCASTQZ128rm:
883 case X86::VPBROADCASTQZ256rm:
884 case X86::VPBROADCASTQZrm:
885 case X86::VBROADCASTSDZ256rm:
886 case X86::VBROADCASTSDZrm:
888 case X86::VMOVSSZrm_alt:
890 case X86::VMOVSDZrm_alt:
892 case X86::VMOVSHZrm_alt:
893 case X86::VMOVAPDZ128rm:
894 case X86::VMOVAPDZ256rm:
895 case X86::VMOVAPDZrm:
896 case X86::VMOVAPSZ128rm:
897 case X86::VMOVAPSZ256rm:
898 case X86::VMOVAPSZ128rm_NOVLX:
899 case X86::VMOVAPSZ256rm_NOVLX:
900 case X86::VMOVAPSZrm:
901 case X86::VMOVDQA32Z128rm:
902 case X86::VMOVDQA32Z256rm:
903 case X86::VMOVDQA32Zrm:
904 case X86::VMOVDQA64Z128rm:
905 case X86::VMOVDQA64Z256rm:
906 case X86::VMOVDQA64Zrm:
907 case X86::VMOVDQU16Z128rm:
908 case X86::VMOVDQU16Z256rm:
909 case X86::VMOVDQU16Zrm:
910 case X86::VMOVDQU32Z128rm:
911 case X86::VMOVDQU32Z256rm:
912 case X86::VMOVDQU32Zrm:
913 case X86::VMOVDQU64Z128rm:
914 case X86::VMOVDQU64Z256rm:
915 case X86::VMOVDQU64Zrm:
916 case X86::VMOVDQU8Z128rm:
917 case X86::VMOVDQU8Z256rm:
918 case X86::VMOVDQU8Zrm:
919 case X86::VMOVUPDZ128rm:
920 case X86::VMOVUPDZ256rm:
921 case X86::VMOVUPDZrm:
922 case X86::VMOVUPSZ128rm:
923 case X86::VMOVUPSZ256rm:
924 case X86::VMOVUPSZ128rm_NOVLX:
925 case X86::VMOVUPSZ256rm_NOVLX:
926 case X86::VMOVUPSZrm: {
932 MI.isDereferenceableInvariantLoad()) {
934 if (BaseReg == 0 || BaseReg == X86::RIP)
1012 if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS &&
1022 unsigned ShiftAmtOperandIdx) {
1024 unsigned ShiftCountMask = (
MI.getDesc().TSFlags &
X86II::REX_W) ? 63 : 31;
1025 unsigned Imm =
MI.getOperand(ShiftAmtOperandIdx).getImm();
1026 return Imm & ShiftCountMask;
1037 return ShAmt < 4 && ShAmt > 0;
1045 bool &NoSignFlag,
bool &ClearsOverflowFlag) {
1046 if (!(CmpValDefInstr.
getOpcode() == X86::SUBREG_TO_REG &&
1047 CmpInstr.
getOpcode() == X86::TEST64rr) &&
1048 !(CmpValDefInstr.
getOpcode() == X86::COPY &&
1056 "CmpInstr is an analyzable TEST16rr/TEST64rr, and "
1057 "`X86InstrInfo::analyzeCompare` requires two reg operands are the"
1066 "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
1067 "is a user of COPY sub16bit.");
1069 if (CmpInstr.
getOpcode() == X86::TEST16rr) {
1078 if (!((VregDefInstr->
getOpcode() == X86::AND32ri ||
1079 VregDefInstr->
getOpcode() == X86::AND64ri32) &&
1084 if (CmpInstr.
getOpcode() == X86::TEST64rr) {
1098 assert(VregDefInstr &&
"Must have a definition (SSA)");
1108 if (X86::isAND(VregDefInstr->
getOpcode())) {
1128 if (Instr.modifiesRegister(X86::EFLAGS,
TRI))
1132 *AndInstr = VregDefInstr;
1153 ClearsOverflowFlag =
true;
1160 unsigned Opc,
bool AllowSP,
Register &NewSrc,
1166 RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
1168 RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
1171 isKill =
MI.killsRegister(SrcReg);
1175 if (Opc != X86::LEA64_32r) {
1177 assert(!Src.isUndef() &&
"Undef op doesn't need optimization");
1193 assert(!Src.isUndef() &&
"Undef op doesn't need optimization");
1223MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned MIOpc,
1227 bool Is8BitOp)
const {
1234 "Unexpected type for LEA transform");
1243 if (!Subtarget.is64Bit())
1246 unsigned Opcode = X86::LEA64_32r;
1262 bool IsDead =
MI.getOperand(0).isDead();
1263 bool IsKill =
MI.getOperand(1).isKill();
1264 unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
1265 assert(!
MI.getOperand(1).isUndef() &&
"Undef op doesn't need optimization");
1281 case X86::SHL16ri: {
1282 unsigned ShAmt =
MI.getOperand(2).getImm();
1299 case X86::ADD8ri_DB:
1301 case X86::ADD16ri_DB:
1305 case X86::ADD8rr_DB:
1307 case X86::ADD16rr_DB: {
1308 Src2 =
MI.getOperand(2).getReg();
1309 bool IsKill2 =
MI.getOperand(2).isKill();
1310 assert(!
MI.getOperand(2).isUndef() &&
"Undef op doesn't need optimization");
1314 addRegReg(MIB, InRegLEA,
true, InRegLEA,
false);
1316 if (Subtarget.is64Bit())
1317 InRegLEA2 =
RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1319 InRegLEA2 =
RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1322 ImpDef2 =
BuildMI(
MBB, &*MIB,
MI.getDebugLoc(),
get(X86::IMPLICIT_DEF),
1324 InsMI2 =
BuildMI(
MBB, &*MIB,
MI.getDebugLoc(),
get(TargetOpcode::COPY))
1327 addRegReg(MIB, InRegLEA,
true, InRegLEA2,
true);
1329 if (LV && IsKill2 && InsMI2)
1425 if (
MI.getNumOperands() > 2)
1426 if (
MI.getOperand(2).isReg() &&
MI.getOperand(2).isUndef())
1431 bool Is64Bit = Subtarget.is64Bit();
1433 bool Is8BitOp =
false;
1434 unsigned NumRegOperands = 2;
1435 unsigned MIOpc =
MI.getOpcode();
1439 case X86::SHL64ri: {
1440 assert(
MI.getNumOperands() >= 3 &&
"Unknown shift instruction!");
1447 Src.getReg(), &X86::GR64_NOSPRegClass))
1450 NewMI =
BuildMI(MF,
MI.getDebugLoc(),
get(X86::LEA64r))
1459 case X86::SHL32ri: {
1460 assert(
MI.getNumOperands() >= 3 &&
"Unknown shift instruction!");
1465 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1471 ImplicitOp, LV, LIS))
1481 if (ImplicitOp.
getReg() != 0)
1482 MIB.
add(ImplicitOp);
1486 if (LV && SrcReg != Src.getReg())
1493 case X86::SHL16ri: {
1494 assert(
MI.getNumOperands() >= 3 &&
"Unknown shift instruction!");
1498 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1502 assert(
MI.getNumOperands() >= 2 &&
"Unknown inc instruction!");
1503 unsigned Opc = MIOpc == X86::INC64r
1505 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1509 ImplicitOp, LV, LIS))
1515 if (ImplicitOp.
getReg() != 0)
1516 MIB.
add(ImplicitOp);
1521 if (LV && SrcReg != Src.getReg())
1527 assert(
MI.getNumOperands() >= 2 &&
"Unknown dec instruction!");
1528 unsigned Opc = MIOpc == X86::DEC64r
1530 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1535 ImplicitOp, LV, LIS))
1541 if (ImplicitOp.
getReg() != 0)
1542 MIB.
add(ImplicitOp);
1547 if (LV && SrcReg != Src.getReg())
1557 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1559 case X86::ADD64rr_DB:
1561 case X86::ADD32rr_DB: {
1562 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1564 if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
1567 Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1573 ImplicitOp2, LV, LIS))
1578 if (Src.getReg() == Src2.
getReg()) {
1585 ImplicitOp, LV, LIS))
1590 if (ImplicitOp.
getReg() != 0)
1591 MIB.
add(ImplicitOp);
1592 if (ImplicitOp2.
getReg() != 0)
1593 MIB.
add(ImplicitOp2);
1595 NewMI =
addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
1599 if (SrcReg2 != Src2.
getReg())
1601 if (SrcReg != SrcReg2 && SrcReg != Src.getReg())
1608 case X86::ADD8rr_DB:
1612 case X86::ADD16rr_DB:
1613 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1614 case X86::ADD64ri32:
1615 case X86::ADD64ri32_DB:
1616 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1618 BuildMI(MF,
MI.getDebugLoc(),
get(X86::LEA64r)).add(Dest).add(Src),
1622 case X86::ADD32ri_DB: {
1623 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1624 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1629 ImplicitOp, LV, LIS))
1635 if (ImplicitOp.
getReg() != 0)
1636 MIB.
add(ImplicitOp);
1641 if (LV && SrcReg != Src.getReg())
1646 case X86::ADD8ri_DB:
1650 case X86::ADD16ri_DB:
1651 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1656 case X86::SUB32ri: {
1657 if (!
MI.getOperand(2).isImm())
1659 int64_t Imm =
MI.getOperand(2).getImm();
1660 if (!isInt<32>(-Imm))
1663 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1664 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1669 ImplicitOp, LV, LIS))
1675 if (ImplicitOp.
getReg() != 0)
1676 MIB.
add(ImplicitOp);
1681 if (LV && SrcReg != Src.getReg())
1686 case X86::SUB64ri32: {
1687 if (!
MI.getOperand(2).isImm())
1689 int64_t Imm =
MI.getOperand(2).getImm();
1690 if (!isInt<32>(-Imm))
1693 assert(
MI.getNumOperands() >= 3 &&
"Unknown sub instruction!");
1701 case X86::VMOVDQU8Z128rmk:
1702 case X86::VMOVDQU8Z256rmk:
1703 case X86::VMOVDQU8Zrmk:
1704 case X86::VMOVDQU16Z128rmk:
1705 case X86::VMOVDQU16Z256rmk:
1706 case X86::VMOVDQU16Zrmk:
1707 case X86::VMOVDQU32Z128rmk:
1708 case X86::VMOVDQA32Z128rmk:
1709 case X86::VMOVDQU32Z256rmk:
1710 case X86::VMOVDQA32Z256rmk:
1711 case X86::VMOVDQU32Zrmk:
1712 case X86::VMOVDQA32Zrmk:
1713 case X86::VMOVDQU64Z128rmk:
1714 case X86::VMOVDQA64Z128rmk:
1715 case X86::VMOVDQU64Z256rmk:
1716 case X86::VMOVDQA64Z256rmk:
1717 case X86::VMOVDQU64Zrmk:
1718 case X86::VMOVDQA64Zrmk:
1719 case X86::VMOVUPDZ128rmk:
1720 case X86::VMOVAPDZ128rmk:
1721 case X86::VMOVUPDZ256rmk:
1722 case X86::VMOVAPDZ256rmk:
1723 case X86::VMOVUPDZrmk:
1724 case X86::VMOVAPDZrmk:
1725 case X86::VMOVUPSZ128rmk:
1726 case X86::VMOVAPSZ128rmk:
1727 case X86::VMOVUPSZ256rmk:
1728 case X86::VMOVAPSZ256rmk:
1729 case X86::VMOVUPSZrmk:
1730 case X86::VMOVAPSZrmk:
1731 case X86::VBROADCASTSDZ256rmk:
1732 case X86::VBROADCASTSDZrmk:
1733 case X86::VBROADCASTSSZ128rmk:
1734 case X86::VBROADCASTSSZ256rmk:
1735 case X86::VBROADCASTSSZrmk:
1736 case X86::VPBROADCASTDZ128rmk:
1737 case X86::VPBROADCASTDZ256rmk:
1738 case X86::VPBROADCASTDZrmk:
1739 case X86::VPBROADCASTQZ128rmk:
1740 case X86::VPBROADCASTQZ256rmk:
1741 case X86::VPBROADCASTQZrmk: {
1746 case X86::VMOVDQU8Z128rmk:
1747 Opc = X86::VPBLENDMBZ128rmk;
1749 case X86::VMOVDQU8Z256rmk:
1750 Opc = X86::VPBLENDMBZ256rmk;
1752 case X86::VMOVDQU8Zrmk:
1753 Opc = X86::VPBLENDMBZrmk;
1755 case X86::VMOVDQU16Z128rmk:
1756 Opc = X86::VPBLENDMWZ128rmk;
1758 case X86::VMOVDQU16Z256rmk:
1759 Opc = X86::VPBLENDMWZ256rmk;
1761 case X86::VMOVDQU16Zrmk:
1762 Opc = X86::VPBLENDMWZrmk;
1764 case X86::VMOVDQU32Z128rmk:
1765 Opc = X86::VPBLENDMDZ128rmk;
1767 case X86::VMOVDQU32Z256rmk:
1768 Opc = X86::VPBLENDMDZ256rmk;
1770 case X86::VMOVDQU32Zrmk:
1771 Opc = X86::VPBLENDMDZrmk;
1773 case X86::VMOVDQU64Z128rmk:
1774 Opc = X86::VPBLENDMQZ128rmk;
1776 case X86::VMOVDQU64Z256rmk:
1777 Opc = X86::VPBLENDMQZ256rmk;
1779 case X86::VMOVDQU64Zrmk:
1780 Opc = X86::VPBLENDMQZrmk;
1782 case X86::VMOVUPDZ128rmk:
1783 Opc = X86::VBLENDMPDZ128rmk;
1785 case X86::VMOVUPDZ256rmk:
1786 Opc = X86::VBLENDMPDZ256rmk;
1788 case X86::VMOVUPDZrmk:
1789 Opc = X86::VBLENDMPDZrmk;
1791 case X86::VMOVUPSZ128rmk:
1792 Opc = X86::VBLENDMPSZ128rmk;
1794 case X86::VMOVUPSZ256rmk:
1795 Opc = X86::VBLENDMPSZ256rmk;
1797 case X86::VMOVUPSZrmk:
1798 Opc = X86::VBLENDMPSZrmk;
1800 case X86::VMOVDQA32Z128rmk:
1801 Opc = X86::VPBLENDMDZ128rmk;
1803 case X86::VMOVDQA32Z256rmk:
1804 Opc = X86::VPBLENDMDZ256rmk;
1806 case X86::VMOVDQA32Zrmk:
1807 Opc = X86::VPBLENDMDZrmk;
1809 case X86::VMOVDQA64Z128rmk:
1810 Opc = X86::VPBLENDMQZ128rmk;
1812 case X86::VMOVDQA64Z256rmk:
1813 Opc = X86::VPBLENDMQZ256rmk;
1815 case X86::VMOVDQA64Zrmk:
1816 Opc = X86::VPBLENDMQZrmk;
1818 case X86::VMOVAPDZ128rmk:
1819 Opc = X86::VBLENDMPDZ128rmk;
1821 case X86::VMOVAPDZ256rmk:
1822 Opc = X86::VBLENDMPDZ256rmk;
1824 case X86::VMOVAPDZrmk:
1825 Opc = X86::VBLENDMPDZrmk;
1827 case X86::VMOVAPSZ128rmk:
1828 Opc = X86::VBLENDMPSZ128rmk;
1830 case X86::VMOVAPSZ256rmk:
1831 Opc = X86::VBLENDMPSZ256rmk;
1833 case X86::VMOVAPSZrmk:
1834 Opc = X86::VBLENDMPSZrmk;
1836 case X86::VBROADCASTSDZ256rmk:
1837 Opc = X86::VBLENDMPDZ256rmbk;
1839 case X86::VBROADCASTSDZrmk:
1840 Opc = X86::VBLENDMPDZrmbk;
1842 case X86::VBROADCASTSSZ128rmk:
1843 Opc = X86::VBLENDMPSZ128rmbk;
1845 case X86::VBROADCASTSSZ256rmk:
1846 Opc = X86::VBLENDMPSZ256rmbk;
1848 case X86::VBROADCASTSSZrmk:
1849 Opc = X86::VBLENDMPSZrmbk;
1851 case X86::VPBROADCASTDZ128rmk:
1852 Opc = X86::VPBLENDMDZ128rmbk;
1854 case X86::VPBROADCASTDZ256rmk:
1855 Opc = X86::VPBLENDMDZ256rmbk;
1857 case X86::VPBROADCASTDZrmk:
1858 Opc = X86::VPBLENDMDZrmbk;
1860 case X86::VPBROADCASTQZ128rmk:
1861 Opc = X86::VPBLENDMQZ128rmbk;
1863 case X86::VPBROADCASTQZ256rmk:
1864 Opc = X86::VPBLENDMQZ256rmbk;
1866 case X86::VPBROADCASTQZrmk:
1867 Opc = X86::VPBLENDMQZrmbk;
1873 .
add(
MI.getOperand(2))
1875 .
add(
MI.getOperand(3))
1876 .
add(
MI.getOperand(4))
1877 .
add(
MI.getOperand(5))
1878 .
add(
MI.getOperand(6))
1879 .
add(
MI.getOperand(7));
1884 case X86::VMOVDQU8Z128rrk:
1885 case X86::VMOVDQU8Z256rrk:
1886 case X86::VMOVDQU8Zrrk:
1887 case X86::VMOVDQU16Z128rrk:
1888 case X86::VMOVDQU16Z256rrk:
1889 case X86::VMOVDQU16Zrrk:
1890 case X86::VMOVDQU32Z128rrk:
1891 case X86::VMOVDQA32Z128rrk:
1892 case X86::VMOVDQU32Z256rrk:
1893 case X86::VMOVDQA32Z256rrk:
1894 case X86::VMOVDQU32Zrrk:
1895 case X86::VMOVDQA32Zrrk:
1896 case X86::VMOVDQU64Z128rrk:
1897 case X86::VMOVDQA64Z128rrk:
1898 case X86::VMOVDQU64Z256rrk:
1899 case X86::VMOVDQA64Z256rrk:
1900 case X86::VMOVDQU64Zrrk:
1901 case X86::VMOVDQA64Zrrk:
1902 case X86::VMOVUPDZ128rrk:
1903 case X86::VMOVAPDZ128rrk:
1904 case X86::VMOVUPDZ256rrk:
1905 case X86::VMOVAPDZ256rrk:
1906 case X86::VMOVUPDZrrk:
1907 case X86::VMOVAPDZrrk:
1908 case X86::VMOVUPSZ128rrk:
1909 case X86::VMOVAPSZ128rrk:
1910 case X86::VMOVUPSZ256rrk:
1911 case X86::VMOVAPSZ256rrk:
1912 case X86::VMOVUPSZrrk:
1913 case X86::VMOVAPSZrrk: {
1918 case X86::VMOVDQU8Z128rrk:
1919 Opc = X86::VPBLENDMBZ128rrk;
1921 case X86::VMOVDQU8Z256rrk:
1922 Opc = X86::VPBLENDMBZ256rrk;
1924 case X86::VMOVDQU8Zrrk:
1925 Opc = X86::VPBLENDMBZrrk;
1927 case X86::VMOVDQU16Z128rrk:
1928 Opc = X86::VPBLENDMWZ128rrk;
1930 case X86::VMOVDQU16Z256rrk:
1931 Opc = X86::VPBLENDMWZ256rrk;
1933 case X86::VMOVDQU16Zrrk:
1934 Opc = X86::VPBLENDMWZrrk;
1936 case X86::VMOVDQU32Z128rrk:
1937 Opc = X86::VPBLENDMDZ128rrk;
1939 case X86::VMOVDQU32Z256rrk:
1940 Opc = X86::VPBLENDMDZ256rrk;
1942 case X86::VMOVDQU32Zrrk:
1943 Opc = X86::VPBLENDMDZrrk;
1945 case X86::VMOVDQU64Z128rrk:
1946 Opc = X86::VPBLENDMQZ128rrk;
1948 case X86::VMOVDQU64Z256rrk:
1949 Opc = X86::VPBLENDMQZ256rrk;
1951 case X86::VMOVDQU64Zrrk:
1952 Opc = X86::VPBLENDMQZrrk;
1954 case X86::VMOVUPDZ128rrk:
1955 Opc = X86::VBLENDMPDZ128rrk;
1957 case X86::VMOVUPDZ256rrk:
1958 Opc = X86::VBLENDMPDZ256rrk;
1960 case X86::VMOVUPDZrrk:
1961 Opc = X86::VBLENDMPDZrrk;
1963 case X86::VMOVUPSZ128rrk:
1964 Opc = X86::VBLENDMPSZ128rrk;
1966 case X86::VMOVUPSZ256rrk:
1967 Opc = X86::VBLENDMPSZ256rrk;
1969 case X86::VMOVUPSZrrk:
1970 Opc = X86::VBLENDMPSZrrk;
1972 case X86::VMOVDQA32Z128rrk:
1973 Opc = X86::VPBLENDMDZ128rrk;
1975 case X86::VMOVDQA32Z256rrk:
1976 Opc = X86::VPBLENDMDZ256rrk;
1978 case X86::VMOVDQA32Zrrk:
1979 Opc = X86::VPBLENDMDZrrk;
1981 case X86::VMOVDQA64Z128rrk:
1982 Opc = X86::VPBLENDMQZ128rrk;
1984 case X86::VMOVDQA64Z256rrk:
1985 Opc = X86::VPBLENDMQZ256rrk;
1987 case X86::VMOVDQA64Zrrk:
1988 Opc = X86::VPBLENDMQZrrk;
1990 case X86::VMOVAPDZ128rrk:
1991 Opc = X86::VBLENDMPDZ128rrk;
1993 case X86::VMOVAPDZ256rrk:
1994 Opc = X86::VBLENDMPDZ256rrk;
1996 case X86::VMOVAPDZrrk:
1997 Opc = X86::VBLENDMPDZrrk;
1999 case X86::VMOVAPSZ128rrk:
2000 Opc = X86::VBLENDMPSZ128rrk;
2002 case X86::VMOVAPSZ256rrk:
2003 Opc = X86::VBLENDMPSZ256rrk;
2005 case X86::VMOVAPSZrrk:
2006 Opc = X86::VBLENDMPSZrrk;
2012 .
add(
MI.getOperand(2))
2014 .
add(
MI.getOperand(3));
2024 for (
unsigned I = 0;
I < NumRegOperands; ++
I) {
2026 if (
Op.isReg() && (
Op.isDead() ||
Op.isKill()))
2053 unsigned SrcOpIdx2) {
2055 if (SrcOpIdx1 > SrcOpIdx2)
2058 unsigned Op1 = 1, Op2 = 2, Op3 = 3;
2064 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
2066 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
2068 if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
2077 unsigned Opc =
MI.getOpcode();
2086 "Intrinsic instructions can't commute operand 1");
2091 assert(Case < 3 &&
"Unexpected case number!");
2096 const unsigned Form132Index = 0;
2097 const unsigned Form213Index = 1;
2098 const unsigned Form231Index = 2;
2099 static const unsigned FormMapping[][3] = {
2104 {Form231Index, Form213Index, Form132Index},
2109 {Form132Index, Form231Index, Form213Index},
2114 {Form213Index, Form132Index, Form231Index}};
2116 unsigned FMAForms[3];
2122 for (
unsigned FormIndex = 0; FormIndex < 3; FormIndex++)
2123 if (Opc == FMAForms[FormIndex])
2124 return FMAForms[FormMapping[Case][FormIndex]];
2130 unsigned SrcOpIdx2) {
2134 assert(Case < 3 &&
"Unexpected case value!");
2137 static const uint8_t SwapMasks[3][4] = {
2138 {0x04, 0x10, 0x08, 0x20},
2139 {0x02, 0x10, 0x08, 0x40},
2140 {0x02, 0x04, 0x20, 0x40},
2143 uint8_t Imm =
MI.getOperand(
MI.getNumOperands() - 1).getImm();
2145 uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
2146 SwapMasks[Case][2] | SwapMasks[Case][3]);
2148 if (Imm & SwapMasks[Case][0])
2149 NewImm |= SwapMasks[Case][1];
2150 if (Imm & SwapMasks[Case][1])
2151 NewImm |= SwapMasks[Case][0];
2152 if (Imm & SwapMasks[Case][2])
2153 NewImm |= SwapMasks[Case][3];
2154 if (Imm & SwapMasks[Case][3])
2155 NewImm |= SwapMasks[Case][2];
2156 MI.getOperand(
MI.getNumOperands() - 1).setImm(NewImm);
2162#define VPERM_CASES(Suffix) \
2163 case X86::VPERMI2##Suffix##Z128rr: \
2164 case X86::VPERMT2##Suffix##Z128rr: \
2165 case X86::VPERMI2##Suffix##Z256rr: \
2166 case X86::VPERMT2##Suffix##Z256rr: \
2167 case X86::VPERMI2##Suffix##Zrr: \
2168 case X86::VPERMT2##Suffix##Zrr: \
2169 case X86::VPERMI2##Suffix##Z128rm: \
2170 case X86::VPERMT2##Suffix##Z128rm: \
2171 case X86::VPERMI2##Suffix##Z256rm: \
2172 case X86::VPERMT2##Suffix##Z256rm: \
2173 case X86::VPERMI2##Suffix##Zrm: \
2174 case X86::VPERMT2##Suffix##Zrm: \
2175 case X86::VPERMI2##Suffix##Z128rrkz: \
2176 case X86::VPERMT2##Suffix##Z128rrkz: \
2177 case X86::VPERMI2##Suffix##Z256rrkz: \
2178 case X86::VPERMT2##Suffix##Z256rrkz: \
2179 case X86::VPERMI2##Suffix##Zrrkz: \
2180 case X86::VPERMT2##Suffix##Zrrkz: \
2181 case X86::VPERMI2##Suffix##Z128rmkz: \
2182 case X86::VPERMT2##Suffix##Z128rmkz: \
2183 case X86::VPERMI2##Suffix##Z256rmkz: \
2184 case X86::VPERMT2##Suffix##Z256rmkz: \
2185 case X86::VPERMI2##Suffix##Zrmkz: \
2186 case X86::VPERMT2##Suffix##Zrmkz:
2188#define VPERM_CASES_BROADCAST(Suffix) \
2189 VPERM_CASES(Suffix) \
2190 case X86::VPERMI2##Suffix##Z128rmb: \
2191 case X86::VPERMT2##Suffix##Z128rmb: \
2192 case X86::VPERMI2##Suffix##Z256rmb: \
2193 case X86::VPERMT2##Suffix##Z256rmb: \
2194 case X86::VPERMI2##Suffix##Zrmb: \
2195 case X86::VPERMT2##Suffix##Zrmb: \
2196 case X86::VPERMI2##Suffix##Z128rmbkz: \
2197 case X86::VPERMT2##Suffix##Z128rmbkz: \
2198 case X86::VPERMI2##Suffix##Z256rmbkz: \
2199 case X86::VPERMT2##Suffix##Z256rmbkz: \
2200 case X86::VPERMI2##Suffix##Zrmbkz: \
2201 case X86::VPERMT2##Suffix##Zrmbkz:
2214#undef VPERM_CASES_BROADCAST
2221#define VPERM_CASES(Orig, New) \
2222 case X86::Orig##Z128rr: \
2223 return X86::New##Z128rr; \
2224 case X86::Orig##Z128rrkz: \
2225 return X86::New##Z128rrkz; \
2226 case X86::Orig##Z128rm: \
2227 return X86::New##Z128rm; \
2228 case X86::Orig##Z128rmkz: \
2229 return X86::New##Z128rmkz; \
2230 case X86::Orig##Z256rr: \
2231 return X86::New##Z256rr; \
2232 case X86::Orig##Z256rrkz: \
2233 return X86::New##Z256rrkz; \
2234 case X86::Orig##Z256rm: \
2235 return X86::New##Z256rm; \
2236 case X86::Orig##Z256rmkz: \
2237 return X86::New##Z256rmkz; \
2238 case X86::Orig##Zrr: \
2239 return X86::New##Zrr; \
2240 case X86::Orig##Zrrkz: \
2241 return X86::New##Zrrkz; \
2242 case X86::Orig##Zrm: \
2243 return X86::New##Zrm; \
2244 case X86::Orig##Zrmkz: \
2245 return X86::New##Zrmkz;
2247#define VPERM_CASES_BROADCAST(Orig, New) \
2248 VPERM_CASES(Orig, New) \
2249 case X86::Orig##Z128rmb: \
2250 return X86::New##Z128rmb; \
2251 case X86::Orig##Z128rmbkz: \
2252 return X86::New##Z128rmbkz; \
2253 case X86::Orig##Z256rmb: \
2254 return X86::New##Z256rmb; \
2255 case X86::Orig##Z256rmbkz: \
2256 return X86::New##Z256rmbkz; \
2257 case X86::Orig##Zrmb: \
2258 return X86::New##Zrmb; \
2259 case X86::Orig##Zrmbkz: \
2260 return X86::New##Zrmbkz;
2278#undef VPERM_CASES_BROADCAST
2284 unsigned OpIdx2)
const {
2286 return std::exchange(NewMI,
false)
2287 ?
MI.getParent()->getParent()->CloneMachineInstr(&
MI)
2291 unsigned Opc =
MI.getOpcode();
2293#define CASE_ND(OP) \
2309#define FROM_TO_SIZE(A, B, S) \
2315 Opc = X86::B##_ND; \
2323 Opc = X86::A##_ND; \
2332 WorkingMI = CloneIfNew(
MI);
2341 WorkingMI = CloneIfNew(
MI);
2343 get(X86::PFSUBRrr == Opc ? X86::PFSUBrr : X86::PFSUBRrr));
2345 case X86::BLENDPDrri:
2346 case X86::BLENDPSrri:
2347 case X86::VBLENDPDrri:
2348 case X86::VBLENDPSrri:
2350 if (
MI.getParent()->getParent()->getFunction().hasOptSize()) {
2351 unsigned Mask = (Opc == X86::BLENDPDrri || Opc == X86::VBLENDPDrri) ? 0x03: 0x0F;
2352 if ((
MI.getOperand(3).getImm() ^ Mask) == 1) {
2353#define FROM_TO(FROM, TO) \
2362 FROM_TO(VBLENDPDrri, VMOVSDrr)
2363 FROM_TO(VBLENDPSrri, VMOVSSrr)
2365 WorkingMI = CloneIfNew(
MI);
2373 case X86::PBLENDWrri:
2374 case X86::VBLENDPDYrri:
2375 case X86::VBLENDPSYrri:
2376 case X86::VPBLENDDrri:
2377 case X86::VPBLENDWrri:
2378 case X86::VPBLENDDYrri:
2379 case X86::VPBLENDWYrri: {
2384 case X86::BLENDPDrri:
2385 Mask = (int8_t)0x03;
2387 case X86::BLENDPSrri:
2388 Mask = (int8_t)0x0F;
2390 case X86::PBLENDWrri:
2391 Mask = (int8_t)0xFF;
2393 case X86::VBLENDPDrri:
2394 Mask = (int8_t)0x03;
2396 case X86::VBLENDPSrri:
2397 Mask = (int8_t)0x0F;
2399 case X86::VBLENDPDYrri:
2400 Mask = (int8_t)0x0F;
2402 case X86::VBLENDPSYrri:
2403 Mask = (int8_t)0xFF;
2405 case X86::VPBLENDDrri:
2406 Mask = (int8_t)0x0F;
2408 case X86::VPBLENDWrri:
2409 Mask = (int8_t)0xFF;
2411 case X86::VPBLENDDYrri:
2412 Mask = (int8_t)0xFF;
2414 case X86::VPBLENDWYrri:
2415 Mask = (int8_t)0xFF;
2421 int8_t Imm =
MI.getOperand(3).getImm() & Mask;
2422 WorkingMI = CloneIfNew(
MI);
2426 case X86::INSERTPSrr:
2427 case X86::VINSERTPSrr:
2428 case X86::VINSERTPSZrr: {
2429 unsigned Imm =
MI.getOperand(
MI.getNumOperands() - 1).getImm();
2430 unsigned ZMask = Imm & 15;
2431 unsigned DstIdx = (Imm >> 4) & 3;
2432 unsigned SrcIdx = (Imm >> 6) & 3;
2436 if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
2439 assert(AltIdx < 4 &&
"Illegal insertion index");
2440 unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
2441 WorkingMI = CloneIfNew(
MI);
2450 case X86::VMOVSSrr: {
2458 Opc = X86::BLENDPDrri;
2462 Opc = X86::BLENDPSrri;
2466 Opc = X86::VBLENDPDrri;
2470 Opc = X86::VBLENDPSrri;
2475 WorkingMI = CloneIfNew(
MI);
2481 WorkingMI = CloneIfNew(
MI);
2486 case X86::SHUFPDrri: {
2488 assert(
MI.getOperand(3).getImm() == 0x02 &&
"Unexpected immediate!");
2489 WorkingMI = CloneIfNew(
MI);
2494 case X86::PCLMULQDQrri:
2495 case X86::VPCLMULQDQrri:
2496 case X86::VPCLMULQDQYrri:
2497 case X86::VPCLMULQDQZrri:
2498 case X86::VPCLMULQDQZ128rri:
2499 case X86::VPCLMULQDQZ256rri: {
2502 unsigned Imm =
MI.getOperand(3).getImm();
2503 unsigned Src1Hi = Imm & 0x01;
2504 unsigned Src2Hi = Imm & 0x10;
2505 WorkingMI = CloneIfNew(
MI);
2509 case X86::VPCMPBZ128rri:
2510 case X86::VPCMPUBZ128rri:
2511 case X86::VPCMPBZ256rri:
2512 case X86::VPCMPUBZ256rri:
2513 case X86::VPCMPBZrri:
2514 case X86::VPCMPUBZrri:
2515 case X86::VPCMPDZ128rri:
2516 case X86::VPCMPUDZ128rri:
2517 case X86::VPCMPDZ256rri:
2518 case X86::VPCMPUDZ256rri:
2519 case X86::VPCMPDZrri:
2520 case X86::VPCMPUDZrri:
2521 case X86::VPCMPQZ128rri:
2522 case X86::VPCMPUQZ128rri:
2523 case X86::VPCMPQZ256rri:
2524 case X86::VPCMPUQZ256rri:
2525 case X86::VPCMPQZrri:
2526 case X86::VPCMPUQZrri:
2527 case X86::VPCMPWZ128rri:
2528 case X86::VPCMPUWZ128rri:
2529 case X86::VPCMPWZ256rri:
2530 case X86::VPCMPUWZ256rri:
2531 case X86::VPCMPWZrri:
2532 case X86::VPCMPUWZrri:
2533 case X86::VPCMPBZ128rrik:
2534 case X86::VPCMPUBZ128rrik:
2535 case X86::VPCMPBZ256rrik:
2536 case X86::VPCMPUBZ256rrik:
2537 case X86::VPCMPBZrrik:
2538 case X86::VPCMPUBZrrik:
2539 case X86::VPCMPDZ128rrik:
2540 case X86::VPCMPUDZ128rrik:
2541 case X86::VPCMPDZ256rrik:
2542 case X86::VPCMPUDZ256rrik:
2543 case X86::VPCMPDZrrik:
2544 case X86::VPCMPUDZrrik:
2545 case X86::VPCMPQZ128rrik:
2546 case X86::VPCMPUQZ128rrik:
2547 case X86::VPCMPQZ256rrik:
2548 case X86::VPCMPUQZ256rrik:
2549 case X86::VPCMPQZrrik:
2550 case X86::VPCMPUQZrrik:
2551 case X86::VPCMPWZ128rrik:
2552 case X86::VPCMPUWZ128rrik:
2553 case X86::VPCMPWZ256rrik:
2554 case X86::VPCMPUWZ256rrik:
2555 case X86::VPCMPWZrrik:
2556 case X86::VPCMPUWZrrik:
2557 WorkingMI = CloneIfNew(
MI);
2561 MI.getOperand(
MI.getNumOperands() - 1).getImm() & 0x7));
2564 case X86::VPCOMUBri:
2566 case X86::VPCOMUDri:
2568 case X86::VPCOMUQri:
2570 case X86::VPCOMUWri:
2571 WorkingMI = CloneIfNew(
MI);
2576 case X86::VCMPSDZrri:
2577 case X86::VCMPSSZrri:
2578 case X86::VCMPPDZrri:
2579 case X86::VCMPPSZrri:
2580 case X86::VCMPSHZrri:
2581 case X86::VCMPPHZrri:
2582 case X86::VCMPPHZ128rri:
2583 case X86::VCMPPHZ256rri:
2584 case X86::VCMPPDZ128rri:
2585 case X86::VCMPPSZ128rri:
2586 case X86::VCMPPDZ256rri:
2587 case X86::VCMPPSZ256rri:
2588 case X86::VCMPPDZrrik:
2589 case X86::VCMPPSZrrik:
2590 case X86::VCMPPDZ128rrik:
2591 case X86::VCMPPSZ128rrik:
2592 case X86::VCMPPDZ256rrik:
2593 case X86::VCMPPSZ256rrik:
2594 WorkingMI = CloneIfNew(
MI);
2597 MI.getOperand(
MI.getNumExplicitOperands() - 1).getImm() & 0x1f));
2599 case X86::VPERM2F128rr:
2600 case X86::VPERM2I128rr:
2604 WorkingMI = CloneIfNew(
MI);
2607 case X86::MOVHLPSrr:
2608 case X86::UNPCKHPDrr:
2609 case X86::VMOVHLPSrr:
2610 case X86::VUNPCKHPDrr:
2611 case X86::VMOVHLPSZrr:
2612 case X86::VUNPCKHPDZ128rr:
2613 assert(Subtarget.
hasSSE2() &&
"Commuting MOVHLP/UNPCKHPD requires SSE2!");
2618 case X86::MOVHLPSrr:
2619 Opc = X86::UNPCKHPDrr;
2621 case X86::UNPCKHPDrr:
2622 Opc = X86::MOVHLPSrr;
2624 case X86::VMOVHLPSrr:
2625 Opc = X86::VUNPCKHPDrr;
2627 case X86::VUNPCKHPDrr:
2628 Opc = X86::VMOVHLPSrr;
2630 case X86::VMOVHLPSZrr:
2631 Opc = X86::VUNPCKHPDZ128rr;
2633 case X86::VUNPCKHPDZ128rr:
2634 Opc = X86::VMOVHLPSZrr;
2637 WorkingMI = CloneIfNew(
MI);
2643 WorkingMI = CloneIfNew(
MI);
2644 unsigned OpNo =
MI.getDesc().getNumOperands() - 1;
2649 case X86::VPTERNLOGDZrri:
2650 case X86::VPTERNLOGDZrmi:
2651 case X86::VPTERNLOGDZ128rri:
2652 case X86::VPTERNLOGDZ128rmi:
2653 case X86::VPTERNLOGDZ256rri:
2654 case X86::VPTERNLOGDZ256rmi:
2655 case X86::VPTERNLOGQZrri:
2656 case X86::VPTERNLOGQZrmi:
2657 case X86::VPTERNLOGQZ128rri:
2658 case X86::VPTERNLOGQZ128rmi:
2659 case X86::VPTERNLOGQZ256rri:
2660 case X86::VPTERNLOGQZ256rmi:
2661 case X86::VPTERNLOGDZrrik:
2662 case X86::VPTERNLOGDZ128rrik:
2663 case X86::VPTERNLOGDZ256rrik:
2664 case X86::VPTERNLOGQZrrik:
2665 case X86::VPTERNLOGQZ128rrik:
2666 case X86::VPTERNLOGQZ256rrik:
2667 case X86::VPTERNLOGDZrrikz:
2668 case X86::VPTERNLOGDZrmikz:
2669 case X86::VPTERNLOGDZ128rrikz:
2670 case X86::VPTERNLOGDZ128rmikz:
2671 case X86::VPTERNLOGDZ256rrikz:
2672 case X86::VPTERNLOGDZ256rmikz:
2673 case X86::VPTERNLOGQZrrikz:
2674 case X86::VPTERNLOGQZrmikz:
2675 case X86::VPTERNLOGQZ128rrikz:
2676 case X86::VPTERNLOGQZ128rmikz:
2677 case X86::VPTERNLOGQZ256rrikz:
2678 case X86::VPTERNLOGQZ256rmikz:
2679 case X86::VPTERNLOGDZ128rmbi:
2680 case X86::VPTERNLOGDZ256rmbi:
2681 case X86::VPTERNLOGDZrmbi:
2682 case X86::VPTERNLOGQZ128rmbi:
2683 case X86::VPTERNLOGQZ256rmbi:
2684 case X86::VPTERNLOGQZrmbi:
2685 case X86::VPTERNLOGDZ128rmbikz:
2686 case X86::VPTERNLOGDZ256rmbikz:
2687 case X86::VPTERNLOGDZrmbikz:
2688 case X86::VPTERNLOGQZ128rmbikz:
2689 case X86::VPTERNLOGQZ256rmbikz:
2690 case X86::VPTERNLOGQZrmbikz: {
2691 WorkingMI = CloneIfNew(
MI);
2697 WorkingMI = CloneIfNew(
MI);
2703 WorkingMI = CloneIfNew(
MI);
2712bool X86InstrInfo::findThreeSrcCommutedOpIndices(
const MachineInstr &
MI,
2713 unsigned &SrcOpIdx1,
2714 unsigned &SrcOpIdx2,
2715 bool IsIntrinsic)
const {
2718 unsigned FirstCommutableVecOp = 1;
2719 unsigned LastCommutableVecOp = 3;
2720 unsigned KMaskOp = -1U;
2743 FirstCommutableVecOp = 3;
2745 LastCommutableVecOp++;
2746 }
else if (IsIntrinsic) {
2749 FirstCommutableVecOp = 2;
2752 if (
isMem(
MI, LastCommutableVecOp))
2753 LastCommutableVecOp--;
2758 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2759 (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
2760 SrcOpIdx1 == KMaskOp))
2762 if (SrcOpIdx2 != CommuteAnyOperandIndex &&
2763 (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
2764 SrcOpIdx2 == KMaskOp))
2769 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2770 SrcOpIdx2 == CommuteAnyOperandIndex) {
2771 unsigned CommutableOpIdx2 = SrcOpIdx2;
2775 if (SrcOpIdx1 == SrcOpIdx2)
2778 CommutableOpIdx2 = LastCommutableVecOp;
2779 else if (SrcOpIdx2 == CommuteAnyOperandIndex)
2781 CommutableOpIdx2 = SrcOpIdx1;
2785 Register Op2Reg =
MI.getOperand(CommutableOpIdx2).getReg();
2787 unsigned CommutableOpIdx1;
2788 for (CommutableOpIdx1 = LastCommutableVecOp;
2789 CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
2791 if (CommutableOpIdx1 == KMaskOp)
2797 if (Op2Reg !=
MI.getOperand(CommutableOpIdx1).getReg())
2802 if (CommutableOpIdx1 < FirstCommutableVecOp)
2807 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2816 unsigned &SrcOpIdx1,
2817 unsigned &SrcOpIdx2)
const {
2819 if (!
Desc.isCommutable())
2822 switch (
MI.getOpcode()) {
2827 case X86::VCMPSDrri:
2828 case X86::VCMPSSrri:
2829 case X86::VCMPPDrri:
2830 case X86::VCMPPSrri:
2831 case X86::VCMPPDYrri:
2832 case X86::VCMPPSYrri:
2833 case X86::VCMPSDZrri:
2834 case X86::VCMPSSZrri:
2835 case X86::VCMPPDZrri:
2836 case X86::VCMPPSZrri:
2837 case X86::VCMPSHZrri:
2838 case X86::VCMPPHZrri:
2839 case X86::VCMPPHZ128rri:
2840 case X86::VCMPPHZ256rri:
2841 case X86::VCMPPDZ128rri:
2842 case X86::VCMPPSZ128rri:
2843 case X86::VCMPPDZ256rri:
2844 case X86::VCMPPSZ256rri:
2845 case X86::VCMPPDZrrik:
2846 case X86::VCMPPSZrrik:
2847 case X86::VCMPPDZ128rrik:
2848 case X86::VCMPPSZ128rrik:
2849 case X86::VCMPPDZ256rrik:
2850 case X86::VCMPPSZ256rrik: {
2855 unsigned Imm =
MI.getOperand(3 + OpOffset).getImm() & 0x7;
2872 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
2882 case X86::SHUFPDrri:
2884 if (
MI.getOperand(3).getImm() == 0x02)
2887 case X86::MOVHLPSrr:
2888 case X86::UNPCKHPDrr:
2889 case X86::VMOVHLPSrr:
2890 case X86::VUNPCKHPDrr:
2891 case X86::VMOVHLPSZrr:
2892 case X86::VUNPCKHPDZ128rr:
2896 case X86::VPTERNLOGDZrri:
2897 case X86::VPTERNLOGDZrmi:
2898 case X86::VPTERNLOGDZ128rri:
2899 case X86::VPTERNLOGDZ128rmi:
2900 case X86::VPTERNLOGDZ256rri:
2901 case X86::VPTERNLOGDZ256rmi:
2902 case X86::VPTERNLOGQZrri:
2903 case X86::VPTERNLOGQZrmi:
2904 case X86::VPTERNLOGQZ128rri:
2905 case X86::VPTERNLOGQZ128rmi:
2906 case X86::VPTERNLOGQZ256rri:
2907 case X86::VPTERNLOGQZ256rmi:
2908 case X86::VPTERNLOGDZrrik:
2909 case X86::VPTERNLOGDZ128rrik:
2910 case X86::VPTERNLOGDZ256rrik:
2911 case X86::VPTERNLOGQZrrik:
2912 case X86::VPTERNLOGQZ128rrik:
2913 case X86::VPTERNLOGQZ256rrik:
2914 case X86::VPTERNLOGDZrrikz:
2915 case X86::VPTERNLOGDZrmikz:
2916 case X86::VPTERNLOGDZ128rrikz:
2917 case X86::VPTERNLOGDZ128rmikz:
2918 case X86::VPTERNLOGDZ256rrikz:
2919 case X86::VPTERNLOGDZ256rmikz:
2920 case X86::VPTERNLOGQZrrikz:
2921 case X86::VPTERNLOGQZrmikz:
2922 case X86::VPTERNLOGQZ128rrikz:
2923 case X86::VPTERNLOGQZ128rmikz:
2924 case X86::VPTERNLOGQZ256rrikz:
2925 case X86::VPTERNLOGQZ256rmikz:
2926 case X86::VPTERNLOGDZ128rmbi:
2927 case X86::VPTERNLOGDZ256rmbi:
2928 case X86::VPTERNLOGDZrmbi:
2929 case X86::VPTERNLOGQZ128rmbi:
2930 case X86::VPTERNLOGQZ256rmbi:
2931 case X86::VPTERNLOGQZrmbi:
2932 case X86::VPTERNLOGDZ128rmbikz:
2933 case X86::VPTERNLOGDZ256rmbikz:
2934 case X86::VPTERNLOGDZrmbikz:
2935 case X86::VPTERNLOGQZ128rmbikz:
2936 case X86::VPTERNLOGQZ256rmbikz:
2937 case X86::VPTERNLOGQZrmbikz:
2938 return findThreeSrcCommutedOpIndices(
MI, SrcOpIdx1, SrcOpIdx2);
2939 case X86::VPDPWSSDYrr:
2940 case X86::VPDPWSSDrr:
2941 case X86::VPDPWSSDSYrr:
2942 case X86::VPDPWSSDSrr:
2943 case X86::VPDPWUUDrr:
2944 case X86::VPDPWUUDYrr:
2945 case X86::VPDPWUUDSrr:
2946 case X86::VPDPWUUDSYrr:
2947 case X86::VPDPBSSDSrr:
2948 case X86::VPDPBSSDSYrr:
2949 case X86::VPDPBSSDrr:
2950 case X86::VPDPBSSDYrr:
2951 case X86::VPDPBUUDSrr:
2952 case X86::VPDPBUUDSYrr:
2953 case X86::VPDPBUUDrr:
2954 case X86::VPDPBUUDYrr:
2955 case X86::VPDPWSSDZ128r:
2956 case X86::VPDPWSSDZ128rk:
2957 case X86::VPDPWSSDZ128rkz:
2958 case X86::VPDPWSSDZ256r:
2959 case X86::VPDPWSSDZ256rk:
2960 case X86::VPDPWSSDZ256rkz:
2961 case X86::VPDPWSSDZr:
2962 case X86::VPDPWSSDZrk:
2963 case X86::VPDPWSSDZrkz:
2964 case X86::VPDPWSSDSZ128r:
2965 case X86::VPDPWSSDSZ128rk:
2966 case X86::VPDPWSSDSZ128rkz:
2967 case X86::VPDPWSSDSZ256r:
2968 case X86::VPDPWSSDSZ256rk:
2969 case X86::VPDPWSSDSZ256rkz:
2970 case X86::VPDPWSSDSZr:
2971 case X86::VPDPWSSDSZrk:
2972 case X86::VPDPWSSDSZrkz:
2973 case X86::VPMADD52HUQrr:
2974 case X86::VPMADD52HUQYrr:
2975 case X86::VPMADD52HUQZ128r:
2976 case X86::VPMADD52HUQZ128rk:
2977 case X86::VPMADD52HUQZ128rkz:
2978 case X86::VPMADD52HUQZ256r:
2979 case X86::VPMADD52HUQZ256rk:
2980 case X86::VPMADD52HUQZ256rkz:
2981 case X86::VPMADD52HUQZr:
2982 case X86::VPMADD52HUQZrk:
2983 case X86::VPMADD52HUQZrkz:
2984 case X86::VPMADD52LUQrr:
2985 case X86::VPMADD52LUQYrr:
2986 case X86::VPMADD52LUQZ128r:
2987 case X86::VPMADD52LUQZ128rk:
2988 case X86::VPMADD52LUQZ128rkz:
2989 case X86::VPMADD52LUQZ256r:
2990 case X86::VPMADD52LUQZ256rk:
2991 case X86::VPMADD52LUQZ256rkz:
2992 case X86::VPMADD52LUQZr:
2993 case X86::VPMADD52LUQZrk:
2994 case X86::VPMADD52LUQZrkz:
2995 case X86::VFMADDCPHZr:
2996 case X86::VFMADDCPHZrk:
2997 case X86::VFMADDCPHZrkz:
2998 case X86::VFMADDCPHZ128r:
2999 case X86::VFMADDCPHZ128rk:
3000 case X86::VFMADDCPHZ128rkz:
3001 case X86::VFMADDCPHZ256r:
3002 case X86::VFMADDCPHZ256rk:
3003 case X86::VFMADDCPHZ256rkz:
3004 case X86::VFMADDCSHZr:
3005 case X86::VFMADDCSHZrk:
3006 case X86::VFMADDCSHZrkz: {
3007 unsigned CommutableOpIdx1 = 2;
3008 unsigned CommutableOpIdx2 = 3;
3014 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3017 if (!
MI.getOperand(SrcOpIdx1).isReg() || !
MI.getOperand(SrcOpIdx2).isReg())
3027 return findThreeSrcCommutedOpIndices(
MI, SrcOpIdx1, SrcOpIdx2,
3034 unsigned CommutableOpIdx1 =
Desc.getNumDefs() + 1;
3035 unsigned CommutableOpIdx2 =
Desc.getNumDefs() + 2;
3038 if ((
MI.getDesc().getOperandConstraint(
Desc.getNumDefs(),
3053 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3057 if (!
MI.getOperand(SrcOpIdx1).isReg() ||
3058 !
MI.getOperand(SrcOpIdx2).isReg())
3070 unsigned Opcode =
MI->getOpcode();
3071 if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
3072 Opcode != X86::LEA64_32r)
3094 unsigned Opcode =
MI.getOpcode();
3095 if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
3123 if (!(X86::isJCC(Opcode) || X86::isSETCC(Opcode) || X86::isCMOVCC(Opcode) ||
3124 X86::isCFCMOVCC(Opcode)))
3234std::pair<X86::CondCode, bool>
3237 bool NeedSwap =
false;
3238 switch (Predicate) {
3317 return std::make_pair(
CC, NeedSwap);
3326#define GET_ND_IF_ENABLED(OPC) (HasNDD ? OPC##_ND : OPC)
3420 switch (Imm & 0x3) {
3438 if (
Info.RegClass == X86::VR128RegClassID ||
3439 Info.RegClass == X86::VR128XRegClassID)
3441 if (
Info.RegClass == X86::VR256RegClassID ||
3442 Info.RegClass == X86::VR256XRegClassID)
3444 if (
Info.RegClass == X86::VR512RegClassID)
3451 return (Reg == X86::FPCW || Reg == X86::FPSW ||
3452 (Reg >= X86::ST0 && Reg <= X86::ST7));
3484#ifdef EXPENSIVE_CHECKS
3486 "Got false negative from X86II::getMemoryOperandNo()!");
3494 unsigned NumOps =
Desc.getNumOperands();
3496#ifdef EXPENSIVE_CHECKS
3498 "Expected no operands to have OPERAND_MEMORY type!");
3507 if (IsMemOp(
Desc.operands()[
I])) {
3508#ifdef EXPENSIVE_CHECKS
3512 "Expected all five operands in the memory reference to have "
3513 "OPERAND_MEMORY type!");
3525 "Unexpected number of operands!");
3528 if (!
Index.isReg() ||
Index.getReg() != X86::NoRegister)
3536 MI.getParent()->getParent()->getConstantPool()->getConstants();
3548 switch (
MI.getOpcode()) {
3549 case X86::TCRETURNdi:
3550 case X86::TCRETURNri:
3551 case X86::TCRETURNmi:
3552 case X86::TCRETURNdi64:
3553 case X86::TCRETURNri64:
3554 case X86::TCRETURNmi64:
3573 if (Symbol.equals(
"__x86_indirect_thunk_r11"))
3578 if (TailCall.getOpcode() != X86::TCRETURNdi &&
3579 TailCall.getOpcode() != X86::TCRETURNdi64) {
3597 TailCall.getOperand(1).getImm() != 0) {
3613 if (
I->isDebugInstr())
3616 assert(0 &&
"Can't find the branch to replace!");
3620 if (
CC != BranchCond[0].getImm())
3626 unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
3627 : X86::TCRETURNdi64cc;
3641 for (
const auto &
C : Clobbers) {
3646 I->eraseFromParent();
3660 if (Succ->isEHPad() || (Succ ==
TBB && FallthroughBB))
3663 if (FallthroughBB && FallthroughBB !=
TBB)
3665 FallthroughBB = Succ;
3667 return FallthroughBB;
3670bool X86InstrInfo::analyzeBranchImpl(
3681 if (
I->isDebugInstr())
3686 if (!isUnpredicatedTerminator(*
I))
3695 if (
I->getOpcode() == X86::JMP_1) {
3699 TBB =
I->getOperand(0).getMBB();
3712 I->eraseFromParent();
3714 UnCondBrIter =
MBB.
end();
3719 TBB =
I->getOperand(0).getMBB();
3730 if (
I->findRegisterUseOperand(X86::EFLAGS)->isUndef())
3736 TBB =
I->getOperand(0).getMBB();
3750 auto NewTBB =
I->getOperand(0).getMBB();
3751 if (OldBranchCode == BranchCode &&
TBB == NewTBB)
3757 if (
TBB == NewTBB &&
3790 Cond[0].setImm(BranchCode);
3801 bool AllowModify)
const {
3803 return analyzeBranchImpl(
MBB,
TBB, FBB,
Cond, CondBranches, AllowModify);
3809 assert(MemRefBegin >= 0 &&
"instr should have memory operand");
3821 if (!Reg.isVirtual())
3826 unsigned Opcode =
MI->getOpcode();
3827 if (Opcode != X86::LEA64r && Opcode != X86::LEA32r)
3833 unsigned Opcode =
MI.getOpcode();
3836 if (Opcode == X86::JMP64m || Opcode == X86::JMP32m) {
3844 if (Opcode == X86::JMP64r || Opcode == X86::JMP32r) {
3846 if (!Reg.isVirtual())
3853 if (
Add->getOpcode() != X86::ADD64rr &&
Add->getOpcode() != X86::ADD32rr)
3866 MachineBranchPredicate &MBP,
3867 bool AllowModify)
const {
3868 using namespace std::placeholders;
3872 if (analyzeBranchImpl(
MBB, MBP.TrueDest, MBP.FalseDest,
Cond, CondBranches,
3876 if (
Cond.size() != 1)
3879 assert(MBP.TrueDest &&
"expected!");
3887 bool SingleUseCondition =
true;
3890 if (
MI.modifiesRegister(X86::EFLAGS,
TRI)) {
3895 if (
MI.readsRegister(X86::EFLAGS,
TRI))
3896 SingleUseCondition =
false;
3902 if (SingleUseCondition) {
3904 if (Succ->isLiveIn(X86::EFLAGS))
3905 SingleUseCondition =
false;
3908 MBP.ConditionDef = ConditionDef;
3909 MBP.SingleUseCondition = SingleUseCondition;
3916 const unsigned TestOpcode =
3917 Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
3919 if (ConditionDef->
getOpcode() == TestOpcode &&
3926 ? MachineBranchPredicate::PRED_NE
3927 : MachineBranchPredicate::PRED_EQ;
3935 int *BytesRemoved)
const {
3936 assert(!BytesRemoved &&
"code size not handled");
3943 if (
I->isDebugInstr())
3945 if (
I->getOpcode() != X86::JMP_1 &&
3949 I->eraseFromParent();
3963 assert(
TBB &&
"insertBranch must not be told to insert a fallthrough");
3965 "X86 branch conditions have one component!");
3966 assert(!BytesAdded &&
"code size not handled");
3970 assert(!FBB &&
"Unconditional branch with multiple successors!");
3976 bool FallThru = FBB ==
nullptr;
3991 if (FBB ==
nullptr) {
3993 assert(FBB &&
"MBB cannot be the last block in function when the false "
3994 "body is a fall-through.");
4018 Register FalseReg,
int &CondCycles,
4019 int &TrueCycles,
int &FalseCycles)
const {
4023 if (
Cond.size() != 1)
4032 RI.getCommonSubClass(
MRI.getRegClass(TrueReg),
MRI.getRegClass(FalseReg));
4037 if (X86::GR16RegClass.hasSubClassEq(RC) ||
4038 X86::GR32RegClass.hasSubClassEq(RC) ||
4039 X86::GR64RegClass.hasSubClassEq(RC)) {
4060 assert(
Cond.size() == 1 &&
"Invalid Cond array");
4063 false , Subtarget.hasNDD());
4072 return X86::GR8_ABCD_HRegClass.contains(Reg);
4078 bool HasAVX = Subtarget.
hasAVX();
4080 bool HasEGPR = Subtarget.hasEGPR();
4087 if (X86::VK16RegClass.
contains(SrcReg)) {
4088 if (X86::GR64RegClass.
contains(DestReg)) {
4089 assert(Subtarget.hasBWI());
4090 return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
4092 if (X86::GR32RegClass.
contains(DestReg))
4093 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
4094 : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
4102 if (X86::VK16RegClass.
contains(DestReg)) {
4103 if (X86::GR64RegClass.
contains(SrcReg)) {
4104 assert(Subtarget.hasBWI());
4105 return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
4107 if (X86::GR32RegClass.
contains(SrcReg))
4108 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
4109 : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
4117 if (X86::GR64RegClass.
contains(DestReg)) {
4118 if (X86::VR128XRegClass.
contains(SrcReg))
4120 return HasAVX512 ? X86::VMOVPQIto64Zrr
4121 : HasAVX ? X86::VMOVPQIto64rr
4122 : X86::MOVPQIto64rr;
4123 if (X86::VR64RegClass.
contains(SrcReg))
4125 return X86::MMX_MOVD64from64rr;
4126 }
else if (X86::GR64RegClass.
contains(SrcReg)) {
4128 if (X86::VR128XRegClass.
contains(DestReg))
4129 return HasAVX512 ? X86::VMOV64toPQIZrr
4130 : HasAVX ? X86::VMOV64toPQIrr
4131 : X86::MOV64toPQIrr;
4133 if (X86::VR64RegClass.
contains(DestReg))
4134 return X86::MMX_MOVD64to64rr;
4140 if (X86::GR32RegClass.
contains(DestReg) &&
4141 X86::VR128XRegClass.contains(SrcReg))
4143 return HasAVX512 ? X86::VMOVPDI2DIZrr
4144 : HasAVX ? X86::VMOVPDI2DIrr
4147 if (X86::VR128XRegClass.
contains(DestReg) &&
4148 X86::GR32RegClass.contains(SrcReg))
4150 return HasAVX512 ? X86::VMOVDI2PDIZrr
4151 : HasAVX ? X86::VMOVDI2PDIrr
4161 bool HasAVX = Subtarget.
hasAVX();
4162 bool HasVLX = Subtarget.hasVLX();
4163 bool HasEGPR = Subtarget.hasEGPR();
4165 if (X86::GR64RegClass.
contains(DestReg, SrcReg))
4167 else if (X86::GR32RegClass.
contains(DestReg, SrcReg))
4169 else if (X86::GR16RegClass.
contains(DestReg, SrcReg))
4171 else if (X86::GR8RegClass.
contains(DestReg, SrcReg)) {
4174 if ((
isHReg(DestReg) ||
isHReg(SrcReg)) && Subtarget.is64Bit()) {
4175 Opc = X86::MOV8rr_NOREX;
4178 "8-bit H register can not be copied outside GR8_NOREX");
4181 }
else if (X86::VR64RegClass.
contains(DestReg, SrcReg))
4182 Opc = X86::MMX_MOVQ64rr;
4183 else if (X86::VR128XRegClass.
contains(DestReg, SrcReg)) {
4185 Opc = X86::VMOVAPSZ128rr;
4186 else if (X86::VR128RegClass.
contains(DestReg, SrcReg))
4187 Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
4191 Opc = X86::VMOVAPSZrr;
4194 TRI->getMatchingSuperReg(DestReg, X86::sub_xmm, &X86::VR512RegClass);
4196 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
4198 }
else if (X86::VR256XRegClass.
contains(DestReg, SrcReg)) {
4200 Opc = X86::VMOVAPSZ256rr;
4201 else if (X86::VR256RegClass.
contains(DestReg, SrcReg))
4202 Opc = X86::VMOVAPSYrr;
4206 Opc = X86::VMOVAPSZrr;
4209 TRI->getMatchingSuperReg(DestReg, X86::sub_ymm, &X86::VR512RegClass);
4211 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
4213 }
else if (X86::VR512RegClass.
contains(DestReg, SrcReg))
4214 Opc = X86::VMOVAPSZrr;
4217 else if (X86::VK16RegClass.
contains(DestReg, SrcReg))
4218 Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
4219 : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
4229 if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
4237 LLVM_DEBUG(
dbgs() <<
"Cannot copy " << RI.getName(SrcReg) <<
" to "
4238 << RI.getName(DestReg) <<
'\n');
4242std::optional<DestSourcePair>
4244 if (
MI.isMoveReg()) {
4248 if (
MI.getOperand(0).isUndef() &&
MI.getOperand(0).getSubReg())
4249 return std::nullopt;
4253 return std::nullopt;
4258 return Load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
4261 : STI.
hasAVX() ? X86::VMOVSSrm
4265 : STI.
hasAVX() ? X86::VMOVSSmr
4271 bool IsStackAligned,
4273 bool HasAVX = STI.
hasAVX();
4275 bool HasVLX = STI.hasVLX();
4276 bool HasEGPR = STI.hasEGPR();
4278 assert(RC !=
nullptr &&
"Invalid target register class");
4283 assert(X86::GR8RegClass.hasSubClassEq(RC) &&
"Unknown 1-byte regclass");
4287 if (
isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
4288 return Load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
4289 return Load ? X86::MOV8rm : X86::MOV8mr;
4291 if (X86::VK16RegClass.hasSubClassEq(RC))
4292 return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
4293 : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
4294 assert(X86::GR16RegClass.hasSubClassEq(RC) &&
"Unknown 2-byte regclass");
4295 return Load ? X86::MOV16rm : X86::MOV16mr;
4297 if (X86::GR32RegClass.hasSubClassEq(RC))
4298 return Load ? X86::MOV32rm : X86::MOV32mr;
4299 if (X86::FR32XRegClass.hasSubClassEq(RC))
4300 return Load ? (HasAVX512 ? X86::VMOVSSZrm_alt
4301 : HasAVX ? X86::VMOVSSrm_alt
4303 : (HasAVX512 ? X86::VMOVSSZmr
4304 : HasAVX ? X86::VMOVSSmr
4306 if (X86::RFP32RegClass.hasSubClassEq(RC))
4307 return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
4308 if (X86::VK32RegClass.hasSubClassEq(RC)) {
4309 assert(STI.hasBWI() &&
"KMOVD requires BWI");
4310 return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
4311 : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
4315 if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
4316 X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
4317 X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
4318 X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
4319 X86::VK16PAIRRegClass.hasSubClassEq(RC))
4320 return Load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
4321 if (X86::FR16RegClass.hasSubClassEq(RC) ||
4322 X86::FR16XRegClass.hasSubClassEq(RC))
4326 if (X86::GR64RegClass.hasSubClassEq(RC))
4327 return Load ? X86::MOV64rm : X86::MOV64mr;
4328 if (X86::FR64XRegClass.hasSubClassEq(RC))
4329 return Load ? (HasAVX512 ? X86::VMOVSDZrm_alt
4330 : HasAVX ? X86::VMOVSDrm_alt
4332 : (HasAVX512 ? X86::VMOVSDZmr
4333 : HasAVX ? X86::VMOVSDmr
4335 if (X86::VR64RegClass.hasSubClassEq(RC))
4336 return Load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
4337 if (X86::RFP64RegClass.hasSubClassEq(RC))
4338 return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
4339 if (X86::VK64RegClass.hasSubClassEq(RC)) {
4340 assert(STI.hasBWI() &&
"KMOVQ requires BWI");
4341 return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
4342 : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
4346 assert(X86::RFP80RegClass.hasSubClassEq(RC) &&
"Unknown 10-byte regclass");
4347 return Load ? X86::LD_Fp80m : X86::ST_FpP80m;
4349 if (X86::VR128XRegClass.hasSubClassEq(RC)) {
4352 return Load ? (HasVLX ? X86::VMOVAPSZ128rm
4353 : HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX
4354 : HasAVX ? X86::VMOVAPSrm
4356 : (HasVLX ? X86::VMOVAPSZ128mr
4357 : HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX
4358 : HasAVX ? X86::VMOVAPSmr
4361 return Load ? (HasVLX ? X86::VMOVUPSZ128rm
4362 : HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX
4363 : HasAVX ? X86::VMOVUPSrm
4365 : (HasVLX ? X86::VMOVUPSZ128mr
4366 : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX
4367 : HasAVX ? X86::VMOVUPSmr
4373 assert(X86::VR256XRegClass.hasSubClassEq(RC) &&
"Unknown 32-byte regclass");
4376 return Load ? (HasVLX ? X86::VMOVAPSZ256rm
4377 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
4379 : (HasVLX ? X86::VMOVAPSZ256mr
4380 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
4383 return Load ? (HasVLX ? X86::VMOVUPSZ256rm
4384 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
4386 : (HasVLX ? X86::VMOVUPSZ256mr
4387 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
4390 assert(X86::VR512RegClass.hasSubClassEq(RC) &&
"Unknown 64-byte regclass");
4393 return Load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
4395 return Load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
4397 assert(X86::TILERegClass.hasSubClassEq(RC) &&
"Unknown 1024-byte regclass");
4398 assert(STI.hasAMXTILE() &&
"Using 8*1024-bit register requires AMX-TILE");
4399#define GET_EGPR_IF_ENABLED(OPC) (STI.hasEGPR() ? OPC##_EVEX : OPC)
4402#undef GET_EGPR_IF_ENABLED
4406std::optional<ExtAddrMode>
4411 if (MemRefBegin < 0)
4412 return std::nullopt;
4417 if (!BaseOp.isReg())
4418 return std::nullopt;
4422 if (!DispMO.
isImm())
4423 return std::nullopt;
4449 ErrInfo =
"Scale factor in address must be 1, 2, 4 or 8";
4454 ErrInfo =
"Displacement in address must fit into 32-bit signed "
4464 int64_t &ImmVal)
const {
4470 if (
MI.isSubregToReg()) {
4474 if (!
MI.getOperand(1).isImm())
4476 unsigned FillBits =
MI.getOperand(1).getImm();
4477 unsigned SubIdx =
MI.getOperand(3).getImm();
4478 MovReg =
MI.getOperand(2).getReg();
4479 if (SubIdx != X86::sub_32bit || FillBits != 0)
4482 MovMI =
MRI.getUniqueVRegDef(MovReg);
4487 if (MovMI->
getOpcode() == X86::MOV32r0 &&
4493 if (MovMI->
getOpcode() != X86::MOV32ri &&
4507 if (!
MI->modifiesRegister(NullValueReg,
TRI))
4509 switch (
MI->getOpcode()) {
4516 assert(
MI->getOperand(0).isDef() &&
MI->getOperand(1).isUse() &&
4517 "expected for shift opcode!");
4518 return MI->getOperand(0).getReg() == NullValueReg &&
4519 MI->getOperand(1).getReg() == NullValueReg;
4524 return TRI->isSubRegisterEq(NullValueReg, MO.getReg());
4538 if (MemRefBegin < 0)
4545 if (!BaseOp->
isReg())
4558 if (!DispMO.
isImm())
4563 if (!BaseOp->
isReg())
4566 OffsetIsScalable =
false;
4571 !
MemOp.memoperands_empty() ?
MemOp.memoperands().front()->getSize() : 0;
4578 bool IsStackAligned,
4593 case X86::TILELOADD:
4594 case X86::TILESTORED:
4595 case X86::TILELOADD_EVEX:
4596 case X86::TILESTORED_EVEX:
4603 unsigned Opc,
Register Reg,
int FrameIdx,
4604 bool isKill)
const {
4608 case X86::TILESTORED:
4609 case X86::TILESTORED_EVEX: {
4622 case X86::TILELOADD:
4623 case X86::TILELOADD_EVEX: {
4645 "Stack slot too small for store");
4647 unsigned Alignment = std::max<uint32_t>(
TRI->getSpillSize(*RC), 16);
4669 "Load size exceeds stack slot");
4670 unsigned Alignment = std::max<uint32_t>(
TRI->getSpillSize(*RC), 16);
4684 Register &SrcReg2, int64_t &CmpMask,
4685 int64_t &CmpValue)
const {
4686 switch (
MI.getOpcode()) {
4689 case X86::CMP64ri32:
4693 SrcReg =
MI.getOperand(0).getReg();
4695 if (
MI.getOperand(1).isImm()) {
4697 CmpValue =
MI.getOperand(1).getImm();
4699 CmpMask = CmpValue = 0;
4707 SrcReg =
MI.getOperand(1).getReg();
4716 SrcReg =
MI.getOperand(1).getReg();
4717 SrcReg2 =
MI.getOperand(2).getReg();
4725 SrcReg =
MI.getOperand(1).getReg();
4727 if (
MI.getOperand(2).isImm()) {
4729 CmpValue =
MI.getOperand(2).getImm();
4731 CmpMask = CmpValue = 0;
4738 SrcReg =
MI.getOperand(0).getReg();
4739 SrcReg2 =
MI.getOperand(1).getReg();
4747 SrcReg =
MI.getOperand(0).getReg();
4748 if (
MI.getOperand(1).getReg() != SrcReg)
4759bool X86InstrInfo::isRedundantFlagInstr(
const MachineInstr &FlagI,
4761 int64_t ImmMask, int64_t ImmValue,
4763 int64_t *ImmDelta)
const {
4778 OIMask != ImmMask || OIValue != ImmValue)
4780 if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
4784 if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
4790 case X86::CMP64ri32:
4801 case X86::TEST8rr: {
4808 SrcReg == OISrcReg && ImmMask == OIMask) {
4809 if (OIValue == ImmValue) {
4812 }
else if (
static_cast<uint64_t>(ImmValue) ==
4813 static_cast<uint64_t>(OIValue) - 1) {
4816 }
else if (
static_cast<uint64_t>(ImmValue) ==
4817 static_cast<uint64_t>(OIValue) + 1) {
4835 bool &ClearsOverflowFlag) {
4837 ClearsOverflowFlag =
false;
4843 if (
MI.getOpcode() == X86::ADD64rm ||
MI.getOpcode() == X86::ADD32rm) {
4844 unsigned Flags =
MI.getOperand(5).getTargetFlags();
4850 switch (
MI.getOpcode()) {
4946 case X86::LZCNT16rr:
4947 case X86::LZCNT16rm:
4948 case X86::LZCNT32rr:
4949 case X86::LZCNT32rm:
4950 case X86::LZCNT64rr:
4951 case X86::LZCNT64rm:
4952 case X86::POPCNT16rr:
4953 case X86::POPCNT16rm:
4954 case X86::POPCNT32rr:
4955 case X86::POPCNT32rm:
4956 case X86::POPCNT64rr:
4957 case X86::POPCNT64rm:
4958 case X86::TZCNT16rr:
4959 case X86::TZCNT16rm:
4960 case X86::TZCNT32rr:
4961 case X86::TZCNT32rm:
4962 case X86::TZCNT64rr:
4963 case X86::TZCNT64rm:
5009 case X86::BLSMSK32rr:
5010 case X86::BLSMSK32rm:
5011 case X86::BLSMSK64rr:
5012 case X86::BLSMSK64rm:
5017 case X86::BLCFILL32rr:
5018 case X86::BLCFILL32rm:
5019 case X86::BLCFILL64rr:
5020 case X86::BLCFILL64rm:
5025 case X86::BLCIC32rr:
5026 case X86::BLCIC32rm:
5027 case X86::BLCIC64rr:
5028 case X86::BLCIC64rm:
5029 case X86::BLCMSK32rr:
5030 case X86::BLCMSK32rm:
5031 case X86::BLCMSK64rr:
5032 case X86::BLCMSK64rm:
5037 case X86::BLSFILL32rr:
5038 case X86::BLSFILL32rm:
5039 case X86::BLSFILL64rr:
5040 case X86::BLSFILL64rm:
5041 case X86::BLSIC32rr:
5042 case X86::BLSIC32rm:
5043 case X86::BLSIC64rr:
5044 case X86::BLSIC64rm:
5049 case X86::T1MSKC32rr:
5050 case X86::T1MSKC32rm:
5051 case X86::T1MSKC64rr:
5052 case X86::T1MSKC64rm:
5053 case X86::TZMSK32rr:
5054 case X86::TZMSK32rm:
5055 case X86::TZMSK64rr:
5056 case X86::TZMSK64rm:
5060 ClearsOverflowFlag =
true;
5062 case X86::BEXTR32rr:
5063 case X86::BEXTR64rr:
5064 case X86::BEXTR32rm:
5065 case X86::BEXTR64rm:
5066 case X86::BEXTRI32ri:
5067 case X86::BEXTRI32mi:
5068 case X86::BEXTRI64ri:
5069 case X86::BEXTRI64mi:
5079 switch (
MI.getOpcode()) {
5087 case X86::LZCNT16rr:
5088 case X86::LZCNT32rr:
5089 case X86::LZCNT64rr:
5091 case X86::POPCNT16rr:
5092 case X86::POPCNT32rr:
5093 case X86::POPCNT64rr:
5095 case X86::TZCNT16rr:
5096 case X86::TZCNT32rr:
5097 case X86::TZCNT64rr:
5111 case X86::BLSMSK32rr:
5112 case X86::BLSMSK64rr:
5144 unsigned NewOpcode = 0;
5145#define FROM_TO(A, B) \
5146 CASE_ND(A) NewOpcode = X86::B; \
5170 if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
5171 NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
5179 bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
5185 assert(SrcRegDef &&
"Must have a definition (SSA)");
5190 bool NoSignFlag =
false;
5191 bool ClearsOverflowFlag =
false;
5192 bool ShouldUpdateCC =
false;
5193 bool IsSwapped =
false;
5195 int64_t ImmDelta = 0;
5208 if (&Inst == SrcRegDef) {
5231 NoSignFlag, ClearsOverflowFlag)) {
5240 if (Inst.modifiesRegister(X86::EFLAGS,
TRI)) {
5251 Inst.getOperand(1).getReg() == SrcReg) {
5252 ShouldUpdateCC =
true;
5263 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
5264 Inst, &IsSwapped, &ImmDelta)) {
5272 if (!Movr0Inst && Inst.
getOpcode() == X86::MOV32r0 &&
5273 Inst.registerDefIsDead(X86::EFLAGS,
TRI)) {
5298 bool FlagsMayLiveOut =
true;
5303 bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS,
TRI);
5304 bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS,
TRI);
5306 if (!UseEFLAGS && ModifyEFLAGS) {
5308 FlagsMayLiveOut =
false;
5311 if (!UseEFLAGS && !ModifyEFLAGS)
5342 if (!ClearsOverflowFlag)
5361 ReplacementCC = NewCC;
5367 }
else if (IsSwapped) {
5374 ShouldUpdateCC =
true;
5375 }
else if (ImmDelta != 0) {
5376 unsigned BitWidth =
TRI->getRegSizeInBits(*
MRI->getRegClass(SrcReg));
5386 if (ImmDelta != 1 || CmpValue == 0)
5396 if (ImmDelta != 1 || CmpValue == 0)
5423 ShouldUpdateCC =
true;
5426 if (ShouldUpdateCC && ReplacementCC != OldCC) {
5430 OpsToUpdate.
push_back(std::make_pair(&Instr, ReplacementCC));
5432 if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS,
TRI)) {
5434 FlagsMayLiveOut =
false;
5441 if ((
MI !=
nullptr || ShouldUpdateCC) && FlagsMayLiveOut) {
5448 assert((
MI ==
nullptr || Sub ==
nullptr) &&
"Should not have Sub and MI set");
5449 Sub =
MI !=
nullptr ?
MI : Sub;
5455 if (&CmpMBB != SubBB)
5460 for (; InsertI != InsertE; ++InsertI) {
5462 if (!Instr->readsRegister(X86::EFLAGS,
TRI) &&
5463 Instr->modifiesRegister(X86::EFLAGS,
TRI)) {
5470 if (InsertI == InsertE)
5476 assert(FlagDef &&
"Unable to locate a def EFLAGS operand");
5482 for (
auto &
Op : OpsToUpdate) {
5505 DefMI =
MRI->getVRegDef(FoldAsLoadDefReg);
5507 bool SawStore =
false;
5513 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5518 if (Reg != FoldAsLoadDefReg)
5525 if (SrcOperandIds.
empty())
5530 FoldAsLoadDefReg = 0;
5560#define FROM_TO(FROM, TO) \
5563 case X86::FROM##_ND: \
5564 return X86::TO##_ND;
5594#define FROM_TO(FROM, TO) \
5613 bool MakeChange)
const {
5620 if (
Reg.isVirtual())
5621 RC =
MRI->getRegClass(Reg);
5622 if ((
Reg.isPhysical() && X86::GR64RegClass.contains(Reg)) ||
5623 (
Reg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC))) {
5624 if (!isInt<32>(ImmVal))
5628 if (
UseMI.findRegisterUseOperand(Reg)->getSubReg())
5632 if (
UseMI.getMF()->getFunction().hasOptSize() &&
Reg.isVirtual() &&
5633 !
MRI->hasOneNonDBGUse(Reg))
5636 unsigned Opc =
UseMI.getOpcode();
5638 if (Opc == TargetOpcode::COPY) {
5642 RC =
MRI->getRegClass(ToReg);
5643 bool GR32Reg = (ToReg.
isVirtual() && X86::GR32RegClass.hasSubClassEq(RC)) ||
5644 (ToReg.
isPhysical() && X86::GR32RegClass.contains(ToReg));
5645 bool GR64Reg = (ToReg.
isVirtual() && X86::GR64RegClass.hasSubClassEq(RC)) ||
5646 (ToReg.
isPhysical() && X86::GR64RegClass.contains(ToReg));
5647 bool GR8Reg = (ToReg.
isVirtual() && X86::GR8RegClass.hasSubClassEq(RC)) ||
5648 (ToReg.
isPhysical() && X86::GR8RegClass.contains(ToReg));
5657 if (isUInt<32>(ImmVal))
5658 NewOpc = X86::MOV32ri64;
5660 NewOpc = X86::MOV64ri;
5661 }
else if (GR32Reg) {
5662 NewOpc = X86::MOV32ri;
5666 if (
UseMI.getParent()->computeRegisterLiveness(
5675 UseMI.removeOperand(
UseMI.findRegisterUseOperandIdx(Reg));
5683 NewOpc = X86::MOV8ri;
5693 if ((NewOpc == X86::SUB64ri32 || NewOpc == X86::SUB32ri ||
5694 NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri ||
5695 NewOpc == X86::SUB64ri32_ND || NewOpc == X86::SUB32ri_ND ||
5696 NewOpc == X86::SBB64ri32_ND || NewOpc == X86::SBB32ri_ND) &&
5697 UseMI.findRegisterUseOperandIdx(Reg) != 2)
5700 if ((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) &&
5701 UseMI.findRegisterUseOperandIdx(Reg) != 1)
5704 using namespace X86;
5705 if (isSHL(Opc) || isSHR(Opc) || isSAR(Opc) || isROL(Opc) || isROR(Opc) ||
5706 isRCL(Opc) || isRCR(Opc)) {
5707 unsigned RegIdx =
UseMI.findRegisterUseOperandIdx(Reg);
5710 if (!isInt<8>(ImmVal))
5717 UseMI.removeOperand(RegIdx);
5731 UseMI.registerDefIsDead(X86::EFLAGS)) {
5735 UseMI.setDesc(
get(TargetOpcode::COPY));
5736 UseMI.removeOperand(
UseMI.findRegisterUseOperandIdx(Reg));
5737 UseMI.removeOperand(
UseMI.findRegisterDefOperandIdx(X86::EFLAGS));
5738 UseMI.untieRegOperand(0);
5742 unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
5743 unsigned ImmOpNum = 2;
5744 if (!
UseMI.getOperand(0).isDef()) {
5748 if (Opc == TargetOpcode::COPY)
5752 commuteInstruction(
UseMI);
5756 UseMI.getOperand(ImmOpNum).ChangeToImmediate(ImmVal);
5760 if (
Reg.isVirtual() &&
MRI->use_nodbg_empty(Reg))
5774 return foldImmediateImpl(
UseMI, &
DefMI, Reg, ImmVal,
MRI,
true);
5786 assert(
Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.");
5806 assert(
Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.");
5824 MIB->
setDesc(
TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
5836 assert(Imm != 0 &&
"Using push/pop for 0 is not efficient.");
5839 int StackAdjustment;
5841 if (Subtarget.is64Bit()) {
5843 MIB->
getOpcode() == X86::MOV32ImmSExti8);
5857 StackAdjustment = 8;
5863 StackAdjustment = 4;
5875 bool EmitCFI = !TFL->
hasFP(MF) && NeedsDwarfCFI;
5922 MIB->
getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr;
5934 const MCInstrDesc &BroadcastDesc,
unsigned SubIdx) {
5937 if (
TRI->getEncodingValue(DestReg) < 16) {
5944 DestReg =
TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
5956 const MCInstrDesc &ExtractDesc,
unsigned SubIdx) {
5959 if (
TRI->getEncodingValue(SrcReg) < 16) {
5966 SrcReg =
TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
5987 bool HasAVX = Subtarget.
hasAVX();
5989 switch (
MI.getOpcode()) {
5996 case X86::MOV32ImmSExti8:
5997 case X86::MOV64ImmSExti8:
5999 case X86::SETB_C32r:
6001 case X86::SETB_C64r:
6009 case X86::FsFLD0F128:
6011 case X86::AVX_SET0: {
6012 assert(HasAVX &&
"AVX not supported");
6015 Register XReg =
TRI->getSubReg(SrcReg, X86::sub_xmm);
6021 case X86::AVX512_128_SET0:
6022 case X86::AVX512_FsFLD0SH:
6023 case X86::AVX512_FsFLD0SS:
6024 case X86::AVX512_FsFLD0SD:
6025 case X86::AVX512_FsFLD0F128: {
6026 bool HasVLX = Subtarget.hasVLX();
6029 if (HasVLX ||
TRI->getEncodingValue(SrcReg) < 16)
6031 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
6034 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
6038 case X86::AVX512_256_SET0:
6039 case X86::AVX512_512_SET0: {
6040 bool HasVLX = Subtarget.hasVLX();
6043 if (HasVLX ||
TRI->getEncodingValue(SrcReg) < 16) {
6044 Register XReg =
TRI->getSubReg(SrcReg, X86::sub_xmm);
6050 if (
MI.getOpcode() == X86::AVX512_256_SET0) {
6053 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
6058 case X86::V_SETALLONES:
6060 get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
6061 case X86::AVX2_SETALLONES:
6063 case X86::AVX1_SETALLONES: {
6070 case X86::AVX512_512_SETALLONES: {
6081 case X86::AVX512_512_SEXT_MASK_32:
6082 case X86::AVX512_512_SEXT_MASK_64: {
6086 unsigned Opc = (
MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64)
6087 ? X86::VPTERNLOGQZrrikz
6088 : X86::VPTERNLOGDZrrikz;
6089 MI.removeOperand(1);
6094 .
addReg(MaskReg, MaskState)
6100 case X86::VMOVAPSZ128rm_NOVLX:
6102 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6103 case X86::VMOVUPSZ128rm_NOVLX:
6105 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6106 case X86::VMOVAPSZ256rm_NOVLX:
6108 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6109 case X86::VMOVUPSZ256rm_NOVLX:
6111 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6112 case X86::VMOVAPSZ128mr_NOVLX:
6114 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6115 case X86::VMOVUPSZ128mr_NOVLX:
6117 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6118 case X86::VMOVAPSZ256mr_NOVLX:
6120 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6121 case X86::VMOVUPSZ256mr_NOVLX:
6123 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6124 case X86::MOV32ri64: {
6126 Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
6127 MI.setDesc(
get(X86::MOV32ri));
6133 case X86::RDFLAGS32:
6134 case X86::RDFLAGS64: {
6135 unsigned Is64Bit =
MI.getOpcode() == X86::RDFLAGS64;
6139 get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
6147 "Unexpected register in operand! Should be EFLAGS.");
6150 "Unexpected register in operand! Should be DF.");
6153 MIB->
setDesc(
get(Is64Bit ? X86::POP64r : X86::POP32r));
6157 case X86::WRFLAGS32:
6158 case X86::WRFLAGS64: {
6159 unsigned Is64Bit =
MI.getOpcode() == X86::WRFLAGS64;
6163 get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
6164 .
addReg(
MI.getOperand(0).getReg());
6166 get(Is64Bit ? X86::POPF64 : X86::POPF32));
6167 MI.eraseFromParent();
6190 case TargetOpcode::LOAD_STACK_GUARD:
6196 case X86::SHLDROT32ri:
6198 case X86::SHLDROT64ri:
6200 case X86::SHRDROT32ri:
6202 case X86::SHRDROT64ri:
6204 case X86::ADD8rr_DB:
6207 case X86::ADD16rr_DB:
6210 case X86::ADD32rr_DB:
6213 case X86::ADD64rr_DB:
6216 case X86::ADD8ri_DB:
6219 case X86::ADD16ri_DB:
6222 case X86::ADD32ri_DB:
6225 case X86::ADD64ri32_DB:
6249 bool ForLoadFold =
false) {
6251 case X86::CVTSI2SSrr:
6252 case X86::CVTSI2SSrm:
6253 case X86::CVTSI642SSrr:
6254 case X86::CVTSI642SSrm:
6255 case X86::CVTSI2SDrr:
6256 case X86::CVTSI2SDrm:
6257 case X86::CVTSI642SDrr:
6258 case X86::CVTSI642SDrm:
6261 return !ForLoadFold;
6262 case X86::CVTSD2SSrr:
6263 case X86::CVTSD2SSrm:
6264 case X86::CVTSS2SDrr:
6265 case X86::CVTSS2SDrm:
6272 case X86::RCPSSr_Int:
6273 case X86::RCPSSm_Int:
6280 case X86::RSQRTSSr_Int:
6281 case X86::RSQRTSSm_Int:
6284 case X86::SQRTSSr_Int:
6285 case X86::SQRTSSm_Int:
6288 case X86::SQRTSDr_Int:
6289 case X86::SQRTSDm_Int:
6291 case X86::VFCMULCPHZ128rm:
6292 case X86::VFCMULCPHZ128rmb:
6293 case X86::VFCMULCPHZ128rmbkz:
6294 case X86::VFCMULCPHZ128rmkz:
6295 case X86::VFCMULCPHZ128rr:
6296 case X86::VFCMULCPHZ128rrkz:
6297 case X86::VFCMULCPHZ256rm:
6298 case X86::VFCMULCPHZ256rmb:
6299 case X86::VFCMULCPHZ256rmbkz:
6300 case X86::VFCMULCPHZ256rmkz:
6301 case X86::VFCMULCPHZ256rr:
6302 case X86::VFCMULCPHZ256rrkz:
6303 case X86::VFCMULCPHZrm:
6304 case X86::VFCMULCPHZrmb:
6305 case X86::VFCMULCPHZrmbkz:
6306 case X86::VFCMULCPHZrmkz:
6307 case X86::VFCMULCPHZrr:
6308 case X86::VFCMULCPHZrrb:
6309 case X86::VFCMULCPHZrrbkz:
6310 case X86::VFCMULCPHZrrkz:
6311 case X86::VFMULCPHZ128rm:
6312 case X86::VFMULCPHZ128rmb:
6313 case X86::VFMULCPHZ128rmbkz:
6314 case X86::VFMULCPHZ128rmkz:
6315 case X86::VFMULCPHZ128rr:
6316 case X86::VFMULCPHZ128rrkz:
6317 case X86::VFMULCPHZ256rm:
6318 case X86::VFMULCPHZ256rmb:
6319 case X86::VFMULCPHZ256rmbkz:
6320 case X86::VFMULCPHZ256rmkz:
6321 case X86::VFMULCPHZ256rr:
6322 case X86::VFMULCPHZ256rrkz:
6323 case X86::VFMULCPHZrm:
6324 case X86::VFMULCPHZrmb:
6325 case X86::VFMULCPHZrmbkz:
6326 case X86::VFMULCPHZrmkz:
6327 case X86::VFMULCPHZrr:
6328 case X86::VFMULCPHZrrb:
6329 case X86::VFMULCPHZrrbkz:
6330 case X86::VFMULCPHZrrkz:
6331 case X86::VFCMULCSHZrm:
6332 case X86::VFCMULCSHZrmkz:
6333 case X86::VFCMULCSHZrr:
6334 case X86::VFCMULCSHZrrb:
6335 case X86::VFCMULCSHZrrbkz:
6336 case X86::VFCMULCSHZrrkz:
6337 case X86::VFMULCSHZrm:
6338 case X86::VFMULCSHZrmkz:
6339 case X86::VFMULCSHZrr:
6340 case X86::VFMULCSHZrrb:
6341 case X86::VFMULCSHZrrbkz:
6342 case X86::VFMULCSHZrrkz:
6343 return Subtarget.hasMULCFalseDeps();
6344 case X86::VPERMDYrm:
6345 case X86::VPERMDYrr:
6346 case X86::VPERMQYmi:
6347 case X86::VPERMQYri:
6348 case X86::VPERMPSYrm:
6349 case X86::VPERMPSYrr:
6350 case X86::VPERMPDYmi:
6351 case X86::VPERMPDYri:
6352 case X86::VPERMDZ256rm:
6353 case X86::VPERMDZ256rmb:
6354 case X86::VPERMDZ256rmbkz:
6355 case X86::VPERMDZ256rmkz:
6356 case X86::VPERMDZ256rr:
6357 case X86::VPERMDZ256rrkz:
6358 case X86::VPERMDZrm:
6359 case X86::VPERMDZrmb:
6360 case X86::VPERMDZrmbkz:
6361 case X86::VPERMDZrmkz:
6362 case X86::VPERMDZrr:
6363 case X86::VPERMDZrrkz:
6364 case X86::VPERMQZ256mbi:
6365 case X86::VPERMQZ256mbikz:
6366 case X86::VPERMQZ256mi:
6367 case X86::VPERMQZ256mikz:
6368 case X86::VPERMQZ256ri:
6369 case X86::VPERMQZ256rikz:
6370 case X86::VPERMQZ256rm:
6371 case X86::VPERMQZ256rmb:
6372 case X86::VPERMQZ256rmbkz:
6373 case X86::VPERMQZ256rmkz:
6374 case X86::VPERMQZ256rr:
6375 case X86::VPERMQZ256rrkz:
6376 case X86::VPERMQZmbi:
6377 case X86::VPERMQZmbikz:
6378 case X86::VPERMQZmi:
6379 case X86::VPERMQZmikz:
6380 case X86::VPERMQZri:
6381 case X86::VPERMQZrikz:
6382 case X86::VPERMQZrm:
6383 case X86::VPERMQZrmb:
6384 case X86::VPERMQZrmbkz:
6385 case X86::VPERMQZrmkz:
6386 case X86::VPERMQZrr:
6387 case X86::VPERMQZrrkz:
6388 case X86::VPERMPSZ256rm:
6389 case X86::VPERMPSZ256rmb:
6390 case X86::VPERMPSZ256rmbkz:
6391 case X86::VPERMPSZ256rmkz:
6392 case X86::VPERMPSZ256rr:
6393 case X86::VPERMPSZ256rrkz:
6394 case X86::VPERMPSZrm:
6395 case X86::VPERMPSZrmb:
6396 case X86::VPERMPSZrmbkz:
6397 case X86::VPERMPSZrmkz:
6398 case X86::VPERMPSZrr:
6399 case X86::VPERMPSZrrkz:
6400 case X86::VPERMPDZ256mbi:
6401 case X86::VPERMPDZ256mbikz:
6402 case X86::VPERMPDZ256mi:
6403 case X86::VPERMPDZ256mikz:
6404 case X86::VPERMPDZ256ri:
6405 case X86::VPERMPDZ256rikz:
6406 case X86::VPERMPDZ256rm:
6407 case X86::VPERMPDZ256rmb:
6408 case X86::VPERMPDZ256rmbkz:
6409 case X86::VPERMPDZ256rmkz:
6410 case X86::VPERMPDZ256rr:
6411 case X86::VPERMPDZ256rrkz:
6412 case X86::VPERMPDZmbi:
6413 case X86::VPERMPDZmbikz:
6414 case X86::VPERMPDZmi:
6415 case X86::VPERMPDZmikz:
6416 case X86::VPERMPDZri:
6417 case X86::VPERMPDZrikz:
6418 case X86::VPERMPDZrm:
6419 case X86::VPERMPDZrmb:
6420 case X86::VPERMPDZrmbkz:
6421 case X86::VPERMPDZrmkz:
6422 case X86::VPERMPDZrr:
6423 case X86::VPERMPDZrrkz:
6424 return Subtarget.hasPERMFalseDeps();
6425 case X86::VRANGEPDZ128rmbi:
6426 case X86::VRANGEPDZ128rmbikz:
6427 case X86::VRANGEPDZ128rmi:
6428 case X86::VRANGEPDZ128rmikz:
6429 case X86::VRANGEPDZ128rri:
6430 case X86::VRANGEPDZ128rrikz:
6431 case X86::VRANGEPDZ256rmbi:
6432 case X86::VRANGEPDZ256rmbikz:
6433 case X86::VRANGEPDZ256rmi:
6434 case X86::VRANGEPDZ256rmikz:
6435 case X86::VRANGEPDZ256rri:
6436 case X86::VRANGEPDZ256rrikz:
6437 case X86::VRANGEPDZrmbi:
6438 case X86::VRANGEPDZrmbikz:
6439 case X86::VRANGEPDZrmi:
6440 case X86::VRANGEPDZrmikz:
6441 case X86::VRANGEPDZrri:
6442 case X86::VRANGEPDZrrib:
6443 case X86::VRANGEPDZrribkz:
6444 case X86::VRANGEPDZrrikz:
6445 case X86::VRANGEPSZ128rmbi:
6446 case X86::VRANGEPSZ128rmbikz:
6447 case X86::VRANGEPSZ128rmi:
6448 case X86::VRANGEPSZ128rmikz:
6449 case X86::VRANGEPSZ128rri:
6450 case X86::VRANGEPSZ128rrikz:
6451 case X86::VRANGEPSZ256rmbi:
6452 case X86::VRANGEPSZ256rmbikz:
6453 case X86::VRANGEPSZ256rmi:
6454 case X86::VRANGEPSZ256rmikz:
6455 case X86::VRANGEPSZ256rri:
6456 case X86::VRANGEPSZ256rrikz:
6457 case X86::VRANGEPSZrmbi:
6458 case X86::VRANGEPSZrmbikz:
6459 case X86::VRANGEPSZrmi:
6460 case X86::VRANGEPSZrmikz:
6461 case X86::VRANGEPSZrri:
6462 case X86::VRANGEPSZrrib:
6463 case X86::VRANGEPSZrribkz:
6464 case X86::VRANGEPSZrrikz:
6465 case X86::VRANGESDZrmi:
6466 case X86::VRANGESDZrmikz:
6467 case X86::VRANGESDZrri:
6468 case X86::VRANGESDZrrib:
6469 case X86::VRANGESDZrribkz:
6470 case X86::VRANGESDZrrikz:
6471 case X86::VRANGESSZrmi:
6472 case X86::VRANGESSZrmikz:
6473 case X86::VRANGESSZrri:
6474 case X86::VRANGESSZrrib:
6475 case X86::VRANGESSZrribkz:
6476 case X86::VRANGESSZrrikz:
6477 return Subtarget.hasRANGEFalseDeps();
6478 case X86::VGETMANTSSZrmi:
6479 case X86::VGETMANTSSZrmikz:
6480 case X86::VGETMANTSSZrri:
6481 case X86::VGETMANTSSZrrib:
6482 case X86::VGETMANTSSZrribkz:
6483 case X86::VGETMANTSSZrrikz:
6484 case X86::VGETMANTSDZrmi:
6485 case X86::VGETMANTSDZrmikz:
6486 case X86::VGETMANTSDZrri:
6487 case X86::VGETMANTSDZrrib:
6488 case X86::VGETMANTSDZrribkz:
6489 case X86::VGETMANTSDZrrikz:
6490 case X86::VGETMANTSHZrmi:
6491 case X86::VGETMANTSHZrmikz:
6492 case X86::VGETMANTSHZrri:
6493 case X86::VGETMANTSHZrrib:
6494 case X86::VGETMANTSHZrribkz:
6495 case X86::VGETMANTSHZrrikz:
6496 case X86::VGETMANTPSZ128rmbi:
6497 case X86::VGETMANTPSZ128rmbikz:
6498 case X86::VGETMANTPSZ128rmi:
6499 case X86::VGETMANTPSZ128rmikz:
6500 case X86::VGETMANTPSZ256rmbi:
6501 case X86::VGETMANTPSZ256rmbikz:
6502 case X86::VGETMANTPSZ256rmi:
6503 case X86::VGETMANTPSZ256rmikz:
6504 case X86::VGETMANTPSZrmbi:
6505 case X86::VGETMANTPSZrmbikz:
6506 case X86::VGETMANTPSZrmi:
6507 case X86::VGETMANTPSZrmikz:
6508 case X86::VGETMANTPDZ128rmbi:
6509 case X86::VGETMANTPDZ128rmbikz:
6510 case X86::VGETMANTPDZ128rmi:
6511 case X86::VGETMANTPDZ128rmikz:
6512 case X86::VGETMANTPDZ256rmbi:
6513 case X86::VGETMANTPDZ256rmbikz:
6514 case X86::VGETMANTPDZ256rmi:
6515 case X86::VGETMANTPDZ256rmikz:
6516 case X86::VGETMANTPDZrmbi:
6517 case X86::VGETMANTPDZrmbikz:
6518 case X86::VGETMANTPDZrmi:
6519 case X86::VGETMANTPDZrmikz:
6520 return Subtarget.hasGETMANTFalseDeps();
6521 case X86::VPMULLQZ128rm:
6522 case X86::VPMULLQZ128rmb:
6523 case X86::VPMULLQZ128rmbkz:
6524 case X86::VPMULLQZ128rmkz:
6525 case X86::VPMULLQZ128rr:
6526 case X86::VPMULLQZ128rrkz:
6527 case X86::VPMULLQZ256rm:
6528 case X86::VPMULLQZ256rmb:
6529 case X86::VPMULLQZ256rmbkz:
6530 case X86::VPMULLQZ256rmkz:
6531 case X86::VPMULLQZ256rr:
6532 case X86::VPMULLQZ256rrkz:
6533 case X86::VPMULLQZrm:
6534 case X86::VPMULLQZrmb:
6535 case X86::VPMULLQZrmbkz:
6536 case X86::VPMULLQZrmkz:
6537 case X86::VPMULLQZrr:
6538 case X86::VPMULLQZrrkz:
6539 return Subtarget.hasMULLQFalseDeps();
6541 case X86::POPCNT32rm:
6542 case X86::POPCNT32rr:
6543 case X86::POPCNT64rm:
6544 case X86::POPCNT64rr:
6545 return Subtarget.hasPOPCNTFalseDeps();
6546 case X86::LZCNT32rm:
6547 case X86::LZCNT32rr:
6548 case X86::LZCNT64rm:
6549 case X86::LZCNT64rr:
6550 case X86::TZCNT32rm:
6551 case X86::TZCNT32rr:
6552 case X86::TZCNT64rm:
6553 case X86::TZCNT64rr:
6554 return Subtarget.hasLZCNTFalseDeps();
6571 if (Reg.isVirtual()) {
6572 if (MO.
readsReg() ||
MI.readsVirtualRegister(Reg))
6575 if (
MI.readsRegister(Reg,
TRI))
6590 bool ForLoadFold =
false) {
6593 case X86::MMX_PUNPCKHBWrr:
6594 case X86::MMX_PUNPCKHWDrr:
6595 case X86::MMX_PUNPCKHDQrr:
6596 case X86::MMX_PUNPCKLBWrr:
6597 case X86::MMX_PUNPCKLWDrr:
6598 case X86::MMX_PUNPCKLDQrr:
6599 case X86::MOVHLPSrr:
6600 case X86::PACKSSWBrr:
6601 case X86::PACKUSWBrr:
6602 case X86::PACKSSDWrr:
6603 case X86::PACKUSDWrr:
6604 case X86::PUNPCKHBWrr:
6605 case X86::PUNPCKLBWrr:
6606 case X86::PUNPCKHWDrr:
6607 case X86::PUNPCKLWDrr:
6608 case X86::PUNPCKHDQrr:
6609 case X86::PUNPCKLDQrr:
6610 case X86::PUNPCKHQDQrr:
6611 case X86::PUNPCKLQDQrr:
6612 case X86::SHUFPDrri:
6613 case X86::SHUFPSrri:
6619 return OpNum == 2 && !ForLoadFold;
6621 case X86::VMOVLHPSrr:
6622 case X86::VMOVLHPSZrr:
6623 case X86::VPACKSSWBrr:
6624 case X86::VPACKUSWBrr:
6625 case X86::VPACKSSDWrr:
6626 case X86::VPACKUSDWrr:
6627 case X86::VPACKSSWBZ128rr:
6628 case X86::VPACKUSWBZ128rr:
6629 case X86::VPACKSSDWZ128rr:
6630 case X86::VPACKUSDWZ128rr:
6631 case X86::VPERM2F128rr:
6632 case X86::VPERM2I128rr:
6633 case X86::VSHUFF32X4Z256rri:
6634 case X86::VSHUFF32X4Zrri:
6635 case X86::VSHUFF64X2Z256rri:
6636 case X86::VSHUFF64X2Zrri:
6637 case X86::VSHUFI32X4Z256rri:
6638 case X86::VSHUFI32X4Zrri:
6639 case X86::VSHUFI64X2Z256rri:
6640 case X86::VSHUFI64X2Zrri:
6641 case X86::VPUNPCKHBWrr:
6642 case X86::VPUNPCKLBWrr:
6643 case X86::VPUNPCKHBWYrr:
6644 case X86::VPUNPCKLBWYrr:
6645 case X86::VPUNPCKHBWZ128rr:
6646 case X86::VPUNPCKLBWZ128rr:
6647 case X86::VPUNPCKHBWZ256rr:
6648 case X86::VPUNPCKLBWZ256rr:
6649 case X86::VPUNPCKHBWZrr:
6650 case X86::VPUNPCKLBWZrr:
6651 case X86::VPUNPCKHWDrr:
6652 case X86::VPUNPCKLWDrr:
6653 case X86::VPUNPCKHWDYrr:
6654 case X86::VPUNPCKLWDYrr:
6655 case X86::VPUNPCKHWDZ128rr:
6656 case X86::VPUNPCKLWDZ128rr:
6657 case X86::VPUNPCKHWDZ256rr:
6658 case X86::VPUNPCKLWDZ256rr:
6659 case X86::VPUNPCKHWDZrr:
6660 case X86::VPUNPCKLWDZrr:
6661 case X86::VPUNPCKHDQrr:
6662 case X86::VPUNPCKLDQrr:
6663 case X86::VPUNPCKHDQYrr:
6664 case X86::VPUNPCKLDQYrr:
6665 case X86::VPUNPCKHDQZ128rr:
6666 case X86::VPUNPCKLDQZ128rr:
6667 case X86::VPUNPCKHDQZ256rr:
6668 case X86::VPUNPCKLDQZ256rr:
6669 case X86::VPUNPCKHDQZrr:
6670 case X86::VPUNPCKLDQZrr:
6671 case X86::VPUNPCKHQDQrr:
6672 case X86::VPUNPCKLQDQrr:
6673 case X86::VPUNPCKHQDQYrr:
6674 case X86::VPUNPCKLQDQYrr:
6675 case X86::VPUNPCKHQDQZ128rr:
6676 case X86::VPUNPCKLQDQZ128rr:
6677 case X86::VPUNPCKHQDQZ256rr:
6678 case X86::VPUNPCKLQDQZ256rr:
6679 case X86::VPUNPCKHQDQZrr:
6680 case X86::VPUNPCKLQDQZrr:
6684 return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
6686 case X86::VCVTSI2SSrr:
6687 case X86::VCVTSI2SSrm:
6688 case X86::VCVTSI2SSrr_Int:
6689 case X86::VCVTSI2SSrm_Int:
6690 case X86::VCVTSI642SSrr:
6691 case X86::VCVTSI642SSrm:
6692 case X86::VCVTSI642SSrr_Int:
6693 case X86::VCVTSI642SSrm_Int:
6694 case X86::VCVTSI2SDrr:
6695 case X86::VCVTSI2SDrm:
6696 case X86::VCVTSI2SDrr_Int:
6697 case X86::VCVTSI2SDrm_Int:
6698 case X86::VCVTSI642SDrr:
6699 case X86::VCVTSI642SDrm:
6700 case X86::VCVTSI642SDrr_Int:
6701 case X86::VCVTSI642SDrm_Int:
6703 case X86::VCVTSI2SSZrr:
6704 case X86::VCVTSI2SSZrm:
6705 case X86::VCVTSI2SSZrr_Int:
6706 case X86::VCVTSI2SSZrrb_Int:
6707 case X86::VCVTSI2SSZrm_Int:
6708 case X86::VCVTSI642SSZrr:
6709 case X86::VCVTSI642SSZrm:
6710 case X86::VCVTSI642SSZrr_Int:
6711 case X86::VCVTSI642SSZrrb_Int:
6712 case X86::VCVTSI642SSZrm_Int:
6713 case X86::VCVTSI2SDZrr:
6714 case X86::VCVTSI2SDZrm:
6715 case X86::VCVTSI2SDZrr_Int:
6716 case X86::VCVTSI2SDZrm_Int:
6717 case X86::VCVTSI642SDZrr:
6718 case X86::VCVTSI642SDZrm:
6719 case X86::VCVTSI642SDZrr_Int:
6720 case X86::VCVTSI642SDZrrb_Int:
6721 case X86::VCVTSI642SDZrm_Int:
6722 case X86::VCVTUSI2SSZrr:
6723 case X86::VCVTUSI2SSZrm:
6724 case X86::VCVTUSI2SSZrr_Int:
6725 case X86::VCVTUSI2SSZrrb_Int:
6726 case X86::VCVTUSI2SSZrm_Int:
6727 case X86::VCVTUSI642SSZrr:
6728 case X86::VCVTUSI642SSZrm:
6729 case X86::VCVTUSI642SSZrr_Int:
6730 case X86::VCVTUSI642SSZrrb_Int:
6731 case X86::VCVTUSI642SSZrm_Int:
6732 case X86::VCVTUSI2SDZrr:
6733 case X86::VCVTUSI2SDZrm:
6734 case X86::VCVTUSI2SDZrr_Int:
6735 case X86::VCVTUSI2SDZrm_Int:
6736 case X86::VCVTUSI642SDZrr:
6737 case X86::VCVTUSI642SDZrm:
6738 case X86::VCVTUSI642SDZrr_Int:
6739 case X86::VCVTUSI642SDZrrb_Int:
6740 case X86::VCVTUSI642SDZrm_Int:
6741 case X86::VCVTSI2SHZrr:
6742 case X86::VCVTSI2SHZrm:
6743 case X86::VCVTSI2SHZrr_Int:
6744 case X86::VCVTSI2SHZrrb_Int:
6745 case X86::VCVTSI2SHZrm_Int:
6746 case X86::VCVTSI642SHZrr:
6747 case X86::VCVTSI642SHZrm:
6748 case X86::VCVTSI642SHZrr_Int:
6749 case X86::VCVTSI642SHZrrb_Int:
6750 case X86::VCVTSI642SHZrm_Int:
6751 case X86::VCVTUSI2SHZrr:
6752 case X86::VCVTUSI2SHZrm:
6753 case X86::VCVTUSI2SHZrr_Int:
6754 case X86::VCVTUSI2SHZrrb_Int:
6755 case X86::VCVTUSI2SHZrm_Int:
6756 case X86::VCVTUSI642SHZrr:
6757 case X86::VCVTUSI642SHZrm:
6758 case X86::VCVTUSI642SHZrr_Int:
6759 case X86::VCVTUSI642SHZrrb_Int:
6760 case X86::VCVTUSI642SHZrm_Int:
6763 return OpNum == 1 && !ForLoadFold;
6764 case X86::VCVTSD2SSrr:
6765 case X86::VCVTSD2SSrm:
6766 case X86::VCVTSD2SSrr_Int:
6767 case X86::VCVTSD2SSrm_Int:
6768 case X86::VCVTSS2SDrr:
6769 case X86::VCVTSS2SDrm:
6770 case X86::VCVTSS2SDrr_Int:
6771 case X86::VCVTSS2SDrm_Int:
6773 case X86::VRCPSSr_Int:
6775 case X86::VRCPSSm_Int:
6776 case X86::VROUNDSDr:
6777 case X86::VROUNDSDm:
6778 case X86::VROUNDSDr_Int:
6779 case X86::VROUNDSDm_Int:
6780 case X86::VROUNDSSr:
6781 case X86::VROUNDSSm:
6782 case X86::VROUNDSSr_Int:
6783 case X86::VROUNDSSm_Int:
6784 case X86::VRSQRTSSr:
6785 case X86::VRSQRTSSr_Int:
6786 case X86::VRSQRTSSm:
6787 case X86::VRSQRTSSm_Int:
6789 case X86::VSQRTSSr_Int:
6791 case X86::VSQRTSSm_Int:
6793 case X86::VSQRTSDr_Int:
6795 case X86::VSQRTSDm_Int:
6797 case X86::VCVTSD2SSZrr:
6798 case X86::VCVTSD2SSZrr_Int:
6799 case X86::VCVTSD2SSZrrb_Int:
6800 case X86::VCVTSD2SSZrm:
6801 case X86::VCVTSD2SSZrm_Int:
6802 case X86::VCVTSS2SDZrr:
6803 case X86::VCVTSS2SDZrr_Int:
6804 case X86::VCVTSS2SDZrrb_Int:
6805 case X86::VCVTSS2SDZrm:
6806 case X86::VCVTSS2SDZrm_Int:
6807 case X86::VGETEXPSDZr:
6808 case X86::VGETEXPSDZrb:
6809 case X86::VGETEXPSDZm:
6810 case X86::VGETEXPSSZr:
6811 case X86::VGETEXPSSZrb:
6812 case X86::VGETEXPSSZm:
6813 case X86::VGETMANTSDZrri:
6814 case X86::VGETMANTSDZrrib:
6815 case X86::VGETMANTSDZrmi:
6816 case X86::VGETMANTSSZrri:
6817 case X86::VGETMANTSSZrrib:
6818 case X86::VGETMANTSSZrmi:
6819 case X86::VRNDSCALESDZr:
6820 case X86::VRNDSCALESDZr_Int:
6821 case X86::VRNDSCALESDZrb_Int:
6822 case X86::VRNDSCALESDZm:
6823 case X86::VRNDSCALESDZm_Int:
6824 case X86::VRNDSCALESSZr:
6825 case X86::VRNDSCALESSZr_Int:
6826 case X86::VRNDSCALESSZrb_Int:
6827 case X86::VRNDSCALESSZm:
6828 case X86::VRNDSCALESSZm_Int:
6829 case X86::VRCP14SDZrr:
6830 case X86::VRCP14SDZrm:
6831 case X86::VRCP14SSZrr:
6832 case X86::VRCP14SSZrm:
6833 case X86::VRCPSHZrr:
6834 case X86::VRCPSHZrm:
6835 case X86::VRSQRTSHZrr:
6836 case X86::VRSQRTSHZrm:
6837 case X86::VREDUCESHZrmi:
6838 case X86::VREDUCESHZrri:
6839 case X86::VREDUCESHZrrib:
6840 case X86::VGETEXPSHZr:
6841 case X86::VGETEXPSHZrb:
6842 case X86::VGETEXPSHZm:
6843 case X86::VGETMANTSHZrri:
6844 case X86::VGETMANTSHZrrib:
6845 case X86::VGETMANTSHZrmi:
6846 case X86::VRNDSCALESHZr:
6847 case X86::VRNDSCALESHZr_Int:
6848 case X86::VRNDSCALESHZrb_Int:
6849 case X86::VRNDSCALESHZm:
6850 case X86::VRNDSCALESHZm_Int:
6851 case X86::VSQRTSHZr:
6852 case X86::VSQRTSHZr_Int:
6853 case X86::VSQRTSHZrb_Int:
6854 case X86::VSQRTSHZm:
6855 case X86::VSQRTSHZm_Int:
6856 case X86::VRCP28SDZr:
6857 case X86::VRCP28SDZrb:
6858 case X86::VRCP28SDZm:
6859 case X86::VRCP28SSZr:
6860 case X86::VRCP28SSZrb:
6861 case X86::VRCP28SSZm:
6862 case X86::VREDUCESSZrmi:
6863 case X86::VREDUCESSZrri:
6864 case X86::VREDUCESSZrrib:
6865 case X86::VRSQRT14SDZrr:
6866 case X86::VRSQRT14SDZrm:
6867 case X86::VRSQRT14SSZrr:
6868 case X86::VRSQRT14SSZrm:
6869 case X86::VRSQRT28SDZr:
6870 case X86::VRSQRT28SDZrb:
6871 case X86::VRSQRT28SDZm:
6872 case X86::VRSQRT28SSZr:
6873 case X86::VRSQRT28SSZrb:
6874 case X86::VRSQRT28SSZm:
6875 case X86::VSQRTSSZr:
6876 case X86::VSQRTSSZr_Int:
6877 case X86::VSQRTSSZrb_Int:
6878 case X86::VSQRTSSZm:
6879 case X86::VSQRTSSZm_Int:
6880 case X86::VSQRTSDZr:
6881 case X86::VSQRTSDZr_Int:
6882 case X86::VSQRTSDZrb_Int:
6883 case X86::VSQRTSDZm:
6884 case X86::VSQRTSDZm_Int:
6885 case X86::VCVTSD2SHZrr:
6886 case X86::VCVTSD2SHZrr_Int:
6887 case X86::VCVTSD2SHZrrb_Int:
6888 case X86::VCVTSD2SHZrm:
6889 case X86::VCVTSD2SHZrm_Int:
6890 case X86::VCVTSS2SHZrr:
6891 case X86::VCVTSS2SHZrr_Int:
6892 case X86::VCVTSS2SHZrrb_Int:
6893 case X86::VCVTSS2SHZrm:
6894 case X86::VCVTSS2SHZrm_Int:
6895 case X86::VCVTSH2SDZrr:
6896 case X86::VCVTSH2SDZrr_Int:
6897 case X86::VCVTSH2SDZrrb_Int:
6898 case X86::VCVTSH2SDZrm:
6899 case X86::VCVTSH2SDZrm_Int:
6900 case X86::VCVTSH2SSZrr:
6901 case X86::VCVTSH2SSZrr_Int:
6902 case X86::VCVTSH2SSZrrb_Int:
6903 case X86::VCVTSH2SSZrm:
6904 case X86::VCVTSH2SSZrm_Int:
6906 case X86::VMOVSSZrrk:
6907 case X86::VMOVSDZrrk:
6908 return OpNum == 3 && !ForLoadFold;
6909 case X86::VMOVSSZrrkz:
6910 case X86::VMOVSDZrrkz:
6911 return OpNum == 2 && !ForLoadFold;
6943 Register Reg =
MI.getOperand(OpNum).getReg();
6945 if (
MI.killsRegister(Reg,
TRI))
6948 if (X86::VR128RegClass.
contains(Reg)) {
6951 unsigned Opc = Subtarget.
hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
6955 MI.addRegisterKilled(Reg,
TRI,
true);
6956 }
else if (X86::VR256RegClass.
contains(Reg)) {
6959 Register XReg =
TRI->getSubReg(Reg, X86::sub_xmm);
6964 MI.addRegisterKilled(Reg,
TRI,
true);
6965 }
else if (X86::VR128XRegClass.
contains(Reg)) {
6967 if (!Subtarget.hasVLX())
6970 BuildMI(*
MI.getParent(),
MI,
MI.getDebugLoc(),
get(X86::VPXORDZ128rr), Reg)
6973 MI.addRegisterKilled(Reg,
TRI,
true);
6974 }
else if (X86::VR256XRegClass.
contains(Reg) ||
6975 X86::VR512RegClass.
contains(Reg)) {
6977 if (!Subtarget.hasVLX())
6981 Register XReg =
TRI->getSubReg(Reg, X86::sub_xmm);
6982 BuildMI(*
MI.getParent(),
MI,
MI.getDebugLoc(),
get(X86::VPXORDZ128rr), XReg)
6986 MI.addRegisterKilled(Reg,
TRI,
true);
6987 }
else if (X86::GR64RegClass.
contains(Reg)) {
6990 Register XReg =
TRI->getSubReg(Reg, X86::sub_32bit);
6995 MI.addRegisterKilled(Reg,
TRI,
true);
6996 }
else if (X86::GR32RegClass.
contains(Reg)) {
7000 MI.addRegisterKilled(Reg,
TRI,
true);
7005 int PtrOffset = 0) {
7006 unsigned NumAddrOps = MOs.
size();
7008 if (NumAddrOps < 4) {
7010 for (
unsigned i = 0; i != NumAddrOps; ++i)
7016 assert(MOs.
size() == 5 &&
"Unexpected memory operand list length");
7017 for (
unsigned i = 0; i != NumAddrOps; ++i) {
7019 if (i == 3 && PtrOffset != 0) {
7040 if (!Reg.isVirtual())
7043 auto *NewRC =
MRI.constrainRegClass(
7047 dbgs() <<
"WARNING: Unable to update register constraint for operand "
7048 <<
Idx <<
" of instruction:\n";
7067 unsigned NumOps =
MI.getDesc().getNumOperands() - 2;
7068 for (
unsigned i = 0; i != NumOps; ++i) {
7087 int PtrOffset = 0) {
7093 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
7096 assert(MO.
isReg() &&
"Expected to fold into reg operand!");
7120 MI.getDebugLoc(),
TII.get(Opcode));
7129 switch (
MI.getOpcode()) {
7130 case X86::INSERTPSrr:
7131 case X86::VINSERTPSrr:
7132 case X86::VINSERTPSZrr:
7136 unsigned Imm =
MI.getOperand(
MI.getNumOperands() - 1).getImm();
7137 unsigned ZMask =
Imm & 15;
7138 unsigned DstIdx = (
Imm >> 4) & 3;
7139 unsigned SrcIdx = (
Imm >> 6) & 3;
7143 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7144 if ((
Size == 0 ||
Size >= 16) && RCSize >= 16 &&
7145 (
MI.getOpcode() != X86::INSERTPSrr || Alignment >=
Align(4))) {
7146 int PtrOffset = SrcIdx * 4;
7147 unsigned NewImm = (DstIdx << 4) | ZMask;
7148 unsigned NewOpCode =
7149 (
MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm
7150 : (
MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm
7153 FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt,
MI, *
this, PtrOffset);
7159 case X86::MOVHLPSrr:
7160 case X86::VMOVHLPSrr:
7161 case X86::VMOVHLPSZrr:
7168 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7169 if ((
Size == 0 ||
Size >= 16) && RCSize >= 16 && Alignment >=
Align(8)) {
7170 unsigned NewOpCode =
7171 (
MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm
7172 : (
MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm
7175 FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt,
MI, *
this, 8);
7180 case X86::UNPCKLPDrr:
7187 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7188 if ((
Size == 0 ||
Size >= 16) && RCSize >= 16 && Alignment <
Align(16)) {
7190 FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt,
MI, *
this);
7203 !
MI.getOperand(1).isReg())
7211 if (
MI.getOperand(1).isUndef())
7220 unsigned Idx1)
const {
7221 unsigned Idx2 = CommuteAnyOperandIndex;
7225 bool HasDef =
MI.getDesc().getNumDefs();
7227 Register Reg1 =
MI.getOperand(Idx1).getReg();
7228 Register Reg2 =
MI.getOperand(Idx2).getReg();
7229 bool Tied1 = 0 ==
MI.getDesc().getOperandConstraint(Idx1,
MCOI::TIED_TO);
7230 bool Tied2 = 0 ==
MI.getDesc().getOperandConstraint(Idx2,
MCOI::TIED_TO);
7234 if ((HasDef && Reg0 == Reg1 && Tied1) || (HasDef && Reg0 == Reg2 && Tied2))
7237 return commuteInstruction(
MI,
false, Idx1, Idx2) ? Idx2 : Idx1;
7242 dbgs() <<
"We failed to fuse operand " <<
Idx <<
" in " <<
MI;
7248 unsigned Size,
Align Alignment,
bool AllowCommute)
const {
7249 bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
7250 bool isTwoAddrFold =
false;
7256 (
MI.getOpcode() == X86::CALL32r ||
MI.getOpcode() == X86::CALL64r ||
7257 MI.getOpcode() == X86::PUSH16r ||
MI.getOpcode() == X86::PUSH32r ||
7258 MI.getOpcode() == X86::PUSH64r))
7267 unsigned NumOps =
MI.getDesc().getNumOperands();
7269 NumOps > 1 &&
MI.getDesc().getOperandConstraint(1,
MCOI::TIED_TO) != -1;
7273 if (
MI.getOpcode() == X86::ADD32ri &&
7282 MI.getOpcode() != X86::ADD64rr)
7287 if (
MI.isCall() &&
MI.getCFIType())
7294 MF,
MI, OpNum, MOs, InsertPt,
Size, Alignment))
7302 if (isTwoAddr && NumOps >= 2 && OpNum < 2 &&
MI.getOperand(0).isReg() &&
7303 MI.getOperand(1).isReg() &&
7304 MI.getOperand(0).getReg() ==
MI.getOperand(1).getReg()) {
7306 isTwoAddrFold =
true;
7309 if (
MI.getOpcode() == X86::MOV32r0) {
7310 NewMI =
MakeM0Inst(*
this, X86::MOV32mi, MOs, InsertPt,
MI);
7320 unsigned Opcode =
I->DstOp;
7324 bool NarrowToMOV32rm =
false;
7328 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7336 if (Opcode != X86::MOV64rm || RCSize != 8 ||
Size != 4)
7338 if (
MI.getOperand(0).getSubReg() ||
MI.getOperand(1).getSubReg())
7340 Opcode = X86::MOV32rm;
7341 NarrowToMOV32rm =
true;
7353 NewMI =
FuseInst(MF, Opcode, OpNum, MOs, InsertPt,
MI, *
this);
7355 if (NarrowToMOV32rm) {
7371 unsigned CommuteOpIdx2 = commuteOperandsForFold(
MI, OpNum);
7372 if (CommuteOpIdx2 == OpNum) {
7382 commuteInstruction(
MI,
false, OpNum, CommuteOpIdx2);
7404 for (
auto Op : Ops) {
7416 if (!RI.hasStackRealignment(MF))
7419 if (Ops.
size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
7420 unsigned NewOpc = 0;
7421 unsigned RCSize = 0;
7422 switch (
MI.getOpcode()) {
7426 NewOpc = X86::CMP8ri;
7430 NewOpc = X86::CMP16ri;
7434 NewOpc = X86::CMP32ri;
7438 NewOpc = X86::CMP64ri32;
7447 MI.setDesc(
get(NewOpc));
7448 MI.getOperand(1).ChangeToImmediate(0);
7449 }
else if (Ops.
size() != 1)
7454 Size, Alignment,
true);
7479 unsigned RegSize =
TRI.getRegSizeInBits(*RC);
7481 if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm ||
7482 Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt ||
7483 Opc == X86::VMOVSSZrm_alt) &&
7489 case X86::CVTSS2SDrr_Int:
7490 case X86::VCVTSS2SDrr_Int:
7491 case X86::VCVTSS2SDZrr_Int:
7492 case X86::VCVTSS2SDZrr_Intk:
7493 case X86::VCVTSS2SDZrr_Intkz:
7494 case X86::CVTSS2SIrr_Int:
7495 case X86::CVTSS2SI64rr_Int:
7496 case X86::VCVTSS2SIrr_Int:
7497 case X86::VCVTSS2SI64rr_Int:
7498 case X86::VCVTSS2SIZrr_Int:
7499 case X86::VCVTSS2SI64Zrr_Int:
7500 case X86::CVTTSS2SIrr_Int:
7501 case X86::CVTTSS2SI64rr_Int:
7502 case X86::VCVTTSS2SIrr_Int:
7503 case X86::VCVTTSS2SI64rr_Int:
7504 case X86::VCVTTSS2SIZrr_Int:
7505 case X86::VCVTTSS2SI64Zrr_Int:
7506 case X86::VCVTSS2USIZrr_Int:
7507 case X86::VCVTSS2USI64Zrr_Int:
7508 case X86::VCVTTSS2USIZrr_Int:
7509 case X86::VCVTTSS2USI64Zrr_Int:
7510 case X86::RCPSSr_Int:
7511 case X86::VRCPSSr_Int:
7512 case X86::RSQRTSSr_Int:
7513 case X86::VRSQRTSSr_Int:
7514 case X86::ROUNDSSr_Int:
7515 case X86::VROUNDSSr_Int:
7516 case X86::COMISSrr_Int:
7517 case X86::VCOMISSrr_Int:
7518 case X86::VCOMISSZrr_Int:
7519 case X86::UCOMISSrr_Int:
7520 case X86::VUCOMISSrr_Int:
7521 case X86::VUCOMISSZrr_Int:
7522 case X86::ADDSSrr_Int:
7523 case X86::VADDSSrr_Int:
7524 case X86::VADDSSZrr_Int:
7525 case X86::CMPSSrri_Int:
7526 case X86::VCMPSSrri_Int:
7527 case X86::VCMPSSZrri_Int:
7528 case X86::DIVSSrr_Int:
7529 case X86::VDIVSSrr_Int:
7530 case X86::VDIVSSZrr_Int:
7531 case X86::MAXSSrr_Int:
7532 case X86::VMAXSSrr_Int:
7533 case X86::VMAXSSZrr_Int:
7534 case X86::MINSSrr_Int:
7535 case X86::VMINSSrr_Int:
7536 case X86::VMINSSZrr_Int:
7537 case X86::MULSSrr_Int:
7538 case X86::VMULSSrr_Int:
7539 case X86::VMULSSZrr_Int:
7540 case X86::SQRTSSr_Int:
7541 case X86::VSQRTSSr_Int:
7542 case X86::VSQRTSSZr_Int:
7543 case X86::SUBSSrr_Int:
7544 case X86::VSUBSSrr_Int:
7545 case X86::VSUBSSZrr_Int:
7546 case X86::VADDSSZrr_Intk:
7547 case X86::VADDSSZrr_Intkz:
7548 case X86::VCMPSSZrri_Intk:
7549 case X86::VDIVSSZrr_Intk:
7550 case X86::VDIVSSZrr_Intkz:
7551 case X86::VMAXSSZrr_Intk:
7552 case X86::VMAXSSZrr_Intkz:
7553 case X86::VMINSSZrr_Intk:
7554 case X86::VMINSSZrr_Intkz:
7555 case X86::VMULSSZrr_Intk:
7556 case X86::VMULSSZrr_Intkz:
7557 case X86::VSQRTSSZr_Intk:
7558 case X86::VSQRTSSZr_Intkz:
7559 case X86::VSUBSSZrr_Intk:
7560 case X86::VSUBSSZrr_Intkz:
7561 case X86::VFMADDSS4rr_Int:
7562 case X86::VFNMADDSS4rr_Int:
7563 case X86::VFMSUBSS4rr_Int:
7564 case X86::VFNMSUBSS4rr_Int:
7565 case X86::VFMADD132SSr_Int:
7566 case X86::VFNMADD132SSr_Int:
7567 case X86::VFMADD213SSr_Int:
7568 case X86::VFNMADD213SSr_Int:
7569 case X86::VFMADD231SSr_Int:
7570 case X86::VFNMADD231SSr_Int:
7571 case X86::VFMSUB132SSr_Int:
7572 case X86::VFNMSUB132SSr_Int:
7573 case X86::VFMSUB213SSr_Int:
7574 case X86::VFNMSUB213SSr_Int:
7575 case X86::VFMSUB231SSr_Int:
7576 case X86::VFNMSUB231SSr_Int:
7577 case X86::VFMADD132SSZr_Int:
7578 case X86::VFNMADD132SSZr_Int:
7579 case X86::VFMADD213SSZr_Int:
7580 case X86::VFNMADD213SSZr_Int:
7581 case X86::VFMADD231SSZr_Int:
7582 case X86::VFNMADD231SSZr_Int:
7583 case X86::VFMSUB132SSZr_Int:
7584 case X86::VFNMSUB132SSZr_Int:
7585 case X86::VFMSUB213SSZr_Int:
7586 case X86::VFNMSUB213SSZr_Int:
7587 case X86::VFMSUB231SSZr_Int:
7588 case X86::VFNMSUB231SSZr_Int:
7589 case X86::VFMADD132SSZr_Intk:
7590 case X86::VFNMADD132SSZr_Intk:
7591 case X86::VFMADD213SSZr_Intk:
7592 case X86::VFNMADD213SSZr_Intk:
7593 case X86::VFMADD231SSZr_Intk:
7594 case X86::VFNMADD231SSZr_Intk:
7595 case X86::VFMSUB132SSZr_Intk:
7596 case X86::VFNMSUB132SSZr_Intk:
7597 case X86::VFMSUB213SSZr_Intk:
7598 case X86::VFNMSUB213SSZr_Intk:
7599 case X86::VFMSUB231SSZr_Intk:
7600 case X86::VFNMSUB231SSZr_Intk:
7601 case X86::VFMADD132SSZr_Intkz:
7602 case X86::VFNMADD132SSZr_Intkz:
7603 case X86::VFMADD213SSZr_Intkz:
7604 case X86::VFNMADD213SSZr_Intkz:
7605 case X86::VFMADD231SSZr_Intkz:
7606 case X86::VFNMADD231SSZr_Intkz:
7607 case X86::VFMSUB132SSZr_Intkz:
7608 case X86::VFNMSUB132SSZr_Intkz:
7609 case X86::VFMSUB213SSZr_Intkz:
7610 case X86::VFNMSUB213SSZr_Intkz:
7611 case X86::VFMSUB231SSZr_Intkz:
7612 case X86::VFNMSUB231SSZr_Intkz:
7613 case X86::VFIXUPIMMSSZrri:
7614 case X86::VFIXUPIMMSSZrrik:
7615 case X86::VFIXUPIMMSSZrrikz:
7616 case X86::VFPCLASSSSZrr:
7617 case X86::VFPCLASSSSZrrk:
7618 case X86::VGETEXPSSZr:
7619 case X86::VGETEXPSSZrk:
7620 case X86::VGETEXPSSZrkz:
7621 case X86::VGETMANTSSZrri:
7622 case X86::VGETMANTSSZrrik:
7623 case X86::VGETMANTSSZrrikz:
7624 case X86::VRANGESSZrri:
7625 case X86::VRANGESSZrrik:
7626 case X86::VRANGESSZrrikz:
7627 case X86::VRCP14SSZrr:
7628 case X86::VRCP14SSZrrk:
7629 case X86::VRCP14SSZrrkz:
7630 case X86::VRCP28SSZr:
7631 case X86::VRCP28SSZrk:
7632 case X86::VRCP28SSZrkz:
7633 case X86::VREDUCESSZrri:
7634 case X86::VREDUCESSZrrik:
7635 case X86::VREDUCESSZrrikz:
7636 case X86::VRNDSCALESSZr_Int:
7637 case X86::VRNDSCALESSZr_Intk:
7638 case X86::VRNDSCALESSZr_Intkz:
7639 case X86::VRSQRT14SSZrr:
7640 case X86::VRSQRT14SSZrrk:
7641 case X86::VRSQRT14SSZrrkz:
7642 case X86::VRSQRT28SSZr:
7643 case X86::VRSQRT28SSZrk:
7644 case X86::VRSQRT28SSZrkz:
7645 case X86::VSCALEFSSZrr:
7646 case X86::VSCALEFSSZrrk:
7647 case X86::VSCALEFSSZrrkz:
7654 if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm ||
7655 Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt ||
7656 Opc == X86::VMOVSDZrm_alt) &&
7662 case X86::CVTSD2SSrr_Int:
7663 case X86::VCVTSD2SSrr_Int:
7664 case X86::VCVTSD2SSZrr_Int:
7665 case X86::VCVTSD2SSZrr_Intk:
7666 case X86::VCVTSD2SSZrr_Intkz:
7667 case X86::CVTSD2SIrr_Int:
7668 case X86::CVTSD2SI64rr_Int:
7669 case X86::VCVTSD2SIrr_Int:
7670 case X86::VCVTSD2SI64rr_Int:
7671 case X86::VCVTSD2SIZrr_Int:
7672 case X86::VCVTSD2SI64Zrr_Int:
7673 case X86::CVTTSD2SIrr_Int:
7674 case X86::CVTTSD2SI64rr_Int:
7675 case X86::VCVTTSD2SIrr_Int:
7676 case X86::VCVTTSD2SI64rr_Int:
7677 case X86::VCVTTSD2SIZrr_Int:
7678 case X86::VCVTTSD2SI64Zrr_Int:
7679 case X86::VCVTSD2USIZrr_Int:
7680 case X86::VCVTSD2USI64Zrr_Int:
7681 case X86::VCVTTSD2USIZrr_Int:
7682 case X86::VCVTTSD2USI64Zrr_Int:
7683 case X86::ROUNDSDr_Int:
7684 case X86::VROUNDSDr_Int:
7685 case X86::COMISDrr_Int:
7686 case X86::VCOMISDrr_Int:
7687 case X86::VCOMISDZrr_Int:
7688 case X86::UCOMISDrr_Int:
7689 case X86::VUCOMISDrr_Int:
7690 case X86::VUCOMISDZrr_Int:
7691 case X86::ADDSDrr_Int:
7692 case X86::VADDSDrr_Int:
7693 case X86::VADDSDZrr_Int:
7694 case X86::CMPSDrri_Int:
7695 case X86::VCMPSDrri_Int:
7696 case X86::VCMPSDZrri_Int:
7697 case X86::DIVSDrr_Int:
7698 case X86::VDIVSDrr_Int:
7699 case X86::VDIVSDZrr_Int:
7700 case X86::MAXSDrr_Int:
7701 case X86::VMAXSDrr_Int:
7702 case X86::VMAXSDZrr_Int:
7703 case X86::MINSDrr_Int:
7704 case X86::VMINSDrr_Int:
7705 case X86::VMINSDZrr_Int:
7706 case X86::MULSDrr_Int:
7707 case X86::VMULSDrr_Int:
7708 case X86::VMULSDZrr_Int:
7709 case X86::SQRTSDr_Int:
7710 case X86::VSQRTSDr_Int:
7711 case X86::VSQRTSDZr_Int:
7712 case X86::SUBSDrr_Int:
7713 case X86::VSUBSDrr_Int:
7714 case X86::VSUBSDZrr_Int:
7715 case X86::VADDSDZrr_Intk:
7716 case X86::VADDSDZrr_Intkz:
7717 case X86::VCMPSDZrri_Intk:
7718 case X86::VDIVSDZrr_Intk:
7719 case X86::VDIVSDZrr_Intkz:
7720 case X86::VMAXSDZrr_Intk:
7721 case X86::VMAXSDZrr_Intkz:
7722 case X86::VMINSDZrr_Intk:
7723 case X86::VMINSDZrr_Intkz:
7724 case X86::VMULSDZrr_Intk:
7725 case X86::VMULSDZrr_Intkz:
7726 case X86::VSQRTSDZr_Intk:
7727 case X86::VSQRTSDZr_Intkz:
7728 case X86::VSUBSDZrr_Intk:
7729 case X86::VSUBSDZrr_Intkz:
7730 case X86::VFMADDSD4rr_Int:
7731 case X86::VFNMADDSD4rr_Int:
7732 case X86::VFMSUBSD4rr_Int:
7733 case X86::VFNMSUBSD4rr_Int:
7734 case X86::VFMADD132SDr_Int:
7735 case X86::VFNMADD132SDr_Int:
7736 case X86::VFMADD213SDr_Int:
7737 case X86::VFNMADD213SDr_Int:
7738 case X86::VFMADD231SDr_Int:
7739 case X86::VFNMADD231SDr_Int:
7740 case X86::VFMSUB132SDr_Int:
7741 case X86::VFNMSUB132SDr_Int:
7742 case X86::VFMSUB213SDr_Int:
7743 case X86::VFNMSUB213SDr_Int:
7744 case X86::VFMSUB231SDr_Int:
7745 case X86::VFNMSUB231SDr_Int:
7746 case X86::VFMADD132SDZr_Int:
7747 case X86::VFNMADD132SDZr_Int:
7748 case X86::VFMADD213SDZr_Int:
7749 case X86::VFNMADD213SDZr_Int:
7750 case X86::VFMADD231SDZr_Int:
7751 case X86::VFNMADD231SDZr_Int:
7752 case X86::VFMSUB132SDZr_Int:
7753 case X86::VFNMSUB132SDZr_Int:
7754 case X86::VFMSUB213SDZr_Int:
7755 case X86::VFNMSUB213SDZr_Int:
7756 case X86::VFMSUB231SDZr_Int:
7757 case X86::VFNMSUB231SDZr_Int:
7758 case X86::VFMADD132SDZr_Intk:
7759 case X86::VFNMADD132SDZr_Intk:
7760 case X86::VFMADD213SDZr_Intk:
7761 case X86::VFNMADD213SDZr_Intk:
7762 case X86::VFMADD231SDZr_Intk:
7763 case X86::VFNMADD231SDZr_Intk:
7764 case X86::VFMSUB132SDZr_Intk:
7765 case X86::VFNMSUB132SDZr_Intk:
7766 case X86::VFMSUB213SDZr_Intk:
7767 case X86::VFNMSUB213SDZr_Intk:
7768 case X86::VFMSUB231SDZr_Intk:
7769 case X86::VFNMSUB231SDZr_Intk:
7770 case X86::VFMADD132SDZr_Intkz:
7771 case X86::VFNMADD132SDZr_Intkz:
7772 case X86::VFMADD213SDZr_Intkz:
7773 case X86::VFNMADD213SDZr_Intkz:
7774 case X86::VFMADD231SDZr_Intkz:
7775 case X86::VFNMADD231SDZr_Intkz:
7776 case X86::VFMSUB132SDZr_Intkz:
7777 case X86::VFNMSUB132SDZr_Intkz:
7778 case X86::VFMSUB213SDZr_Intkz:
7779 case X86::VFNMSUB213SDZr_Intkz:
7780 case X86::VFMSUB231SDZr_Intkz:
7781 case X86::VFNMSUB231SDZr_Intkz:
7782 case X86::VFIXUPIMMSDZrri:
7783 case X86::VFIXUPIMMSDZrrik:
7784 case X86::VFIXUPIMMSDZrrikz:
7785 case X86::VFPCLASSSDZrr:
7786 case X86::VFPCLASSSDZrrk:
7787 case X86::VGETEXPSDZr:
7788 case X86::VGETEXPSDZrk:
7789 case X86::VGETEXPSDZrkz:
7790 case X86::VGETMANTSDZrri:
7791 case X86::VGETMANTSDZrrik:
7792 case X86::VGETMANTSDZrrikz:
7793 case X86::VRANGESDZrri:
7794 case X86::VRANGESDZrrik:
7795 case X86::VRANGESDZrrikz:
7796 case X86::VRCP14SDZrr:
7797 case X86::VRCP14SDZrrk:
7798 case X86::VRCP14SDZrrkz:
7799 case X86::VRCP28SDZr:
7800 case X86::VRCP28SDZrk:
7801 case X86::VRCP28SDZrkz:
7802 case X86::VREDUCESDZrri:
7803 case X86::VREDUCESDZrrik:
7804 case X86::VREDUCESDZrrikz:
7805 case X86::VRNDSCALESDZr_Int:
7806 case X86::VRNDSCALESDZr_Intk:
7807 case X86::VRNDSCALESDZr_Intkz:
7808 case X86::VRSQRT14SDZrr:
7809 case X86::VRSQRT14SDZrrk:
7810 case X86::VRSQRT14SDZrrkz:
7811 case X86::VRSQRT28SDZr:
7812 case X86::VRSQRT28SDZrk:
7813 case X86::VRSQRT28SDZrkz:
7814 case X86::VSCALEFSDZrr:
7815 case X86::VSCALEFSDZrrk:
7816 case X86::VSCALEFSDZrrkz:
7823 if ((Opc == X86::VMOVSHZrm || Opc == X86::VMOVSHZrm_alt) &&
RegSize > 16) {
7828 case X86::VADDSHZrr_Int:
7829 case X86::VCMPSHZrri_Int:
7830 case X86::VDIVSHZrr_Int:
7831 case X86::VMAXSHZrr_Int:
7832 case X86::VMINSHZrr_Int:
7833 case X86::VMULSHZrr_Int:
7834 case X86::VSUBSHZrr_Int:
7835 case X86::VADDSHZrr_Intk:
7836 case X86::VADDSHZrr_Intkz:
7837 case X86::VCMPSHZrri_Intk:
7838 case X86::VDIVSHZrr_Intk:
7839 case X86::VDIVSHZrr_Intkz:
7840 case X86::VMAXSHZrr_Intk:
7841 case X86::VMAXSHZrr_Intkz:
7842 case X86::VMINSHZrr_Intk:
7843 case X86::VMINSHZrr_Intkz:
7844 case X86::VMULSHZrr_Intk:
7845 case X86::VMULSHZrr_Intkz:
7846 case X86::VSUBSHZrr_Intk:
7847 case X86::VSUBSHZrr_Intkz:
7848 case X86::VFMADD132SHZr_Int:
7849 case X86::VFNMADD132SHZr_Int:
7850 case X86::VFMADD213SHZr_Int:
7851 case X86::VFNMADD213SHZr_Int:
7852 case X86::VFMADD231SHZr_Int:
7853 case X86::VFNMADD231SHZr_Int:
7854 case X86::VFMSUB132SHZr_Int:
7855 case X86::VFNMSUB132SHZr_Int:
7856 case X86::VFMSUB213SHZr_Int:
7857 case X86::VFNMSUB213SHZr_Int:
7858 case X86::VFMSUB231SHZr_Int:
7859 case X86::VFNMSUB231SHZr_Int:
7860 case X86::VFMADD132SHZr_Intk:
7861 case X86::VFNMADD132SHZr_Intk:
7862 case X86::VFMADD213SHZr_Intk:
7863 case X86::VFNMADD213SHZr_Intk:
7864 case X86::VFMADD231SHZr_Intk:
7865 case X86::VFNMADD231SHZr_Intk:
7866 case X86::VFMSUB132SHZr_Intk:
7867 case X86::VFNMSUB132SHZr_Intk:
7868 case X86::VFMSUB213SHZr_Intk:
7869 case X86::VFNMSUB213SHZr_Intk:
7870 case X86::VFMSUB231SHZr_Intk:
7871 case X86::VFNMSUB231SHZr_Intk:
7872 case X86::VFMADD132SHZr_Intkz:
7873 case X86::VFNMADD132SHZr_Intkz:
7874 case X86::VFMADD213SHZr_Intkz:
7875 case X86::VFNMADD213SHZr_Intkz:
7876 case X86::VFMADD231SHZr_Intkz:
7877 case X86::VFNMADD231SHZr_Intkz:
7878 case X86::VFMSUB132SHZr_Intkz:
7879 case X86::VFNMSUB132SHZr_Intkz:
7880 case X86::VFMSUB213SHZr_Intkz:
7881 case X86::VFNMSUB213SHZr_Intkz:
7882 case X86::VFMSUB231SHZr_Intkz:
7883 case X86::VFNMSUB231SHZr_Intkz:
7900 for (
auto Op : Ops) {
7901 if (
MI.getOperand(
Op).getSubReg())
7931 case X86::AVX512_512_SET0:
7932 case X86::AVX512_512_SETALLONES:
7933 Alignment =
Align(64);
7935 case X86::AVX2_SETALLONES:
7936 case X86::AVX1_SETALLONES:
7938 case X86::AVX512_256_SET0:
7939 Alignment =
Align(32);
7942 case X86::V_SETALLONES:
7943 case X86::AVX512_128_SET0:
7944 case X86::FsFLD0F128:
7945 case X86::AVX512_FsFLD0F128:
7946 Alignment =
Align(16);
7950 case X86::AVX512_FsFLD0SD:
7951 Alignment =
Align(8);
7954 case X86::AVX512_FsFLD0SS:
7955 Alignment =
Align(4);
7958 case X86::AVX512_FsFLD0SH:
7959 Alignment =
Align(2);
7964 if (Ops.
size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
7965 unsigned NewOpc = 0;
7966 switch (
MI.getOpcode()) {
7970 NewOpc = X86::CMP8ri;
7973 NewOpc = X86::CMP16ri;
7976 NewOpc = X86::CMP32ri;
7979 NewOpc = X86::CMP64ri32;
7983 MI.setDesc(
get(NewOpc));
7984 MI.getOperand(1).ChangeToImmediate(0);
7985 }
else if (Ops.
size() != 1)
7997 case X86::V_SETALLONES:
7998 case X86::AVX2_SETALLONES:
7999 case X86::AVX1_SETALLONES:
8001 case X86::AVX512_128_SET0:
8002 case X86::AVX512_256_SET0:
8003 case X86::AVX512_512_SET0:
8004 case X86::AVX512_512_SETALLONES:
8006 case X86::AVX512_FsFLD0SH:
8008 case X86::AVX512_FsFLD0SD:
8010 case X86::AVX512_FsFLD0SS:
8011 case X86::FsFLD0F128:
8012 case X86::AVX512_FsFLD0F128: {
8021 unsigned PICBase = 0;
8024 if (Subtarget.is64Bit()) {
8037 bool IsAllOnes =
false;
8040 case X86::AVX512_FsFLD0SS:
8044 case X86::AVX512_FsFLD0SD:
8047 case X86::FsFLD0F128:
8048 case X86::AVX512_FsFLD0F128:
8052 case X86::AVX512_FsFLD0SH:
8055 case X86::AVX512_512_SETALLONES:
8058 case X86::AVX512_512_SET0:
8062 case X86::AVX1_SETALLONES:
8063 case X86::AVX2_SETALLONES:
8066 case X86::AVX512_256_SET0:
8076 case X86::V_SETALLONES:
8080 case X86::AVX512_128_SET0:
8098 case X86::VPBROADCASTBZ128rm:
8099 case X86::VPBROADCASTBZ256rm:
8100 case X86::VPBROADCASTBZrm:
8101 case X86::VBROADCASTF32X2Z256rm:
8102 case X86::VBROADCASTF32X2Zrm:
8103 case X86::VBROADCASTI32X2Z128rm:
8104 case X86::VBROADCASTI32X2Z256rm:
8105 case X86::VBROADCASTI32X2Zrm:
8109#define FOLD_BROADCAST(SIZE) \
8110 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands, \
8111 LoadMI.operands_begin() + NumOps); \
8112 return foldMemoryBroadcast(MF, MI, Ops[0], MOs, InsertPt, SIZE, \
8114 case X86::VPBROADCASTWZ128rm:
8115 case X86::VPBROADCASTWZ256rm:
8116 case X86::VPBROADCASTWZrm:
8118 case X86::VPBROADCASTDZ128rm:
8119 case X86::VPBROADCASTDZ256rm:
8120 case X86::VPBROADCASTDZrm:
8121 case X86::VBROADCASTSSZ128rm:
8122 case X86::VBROADCASTSSZ256rm:
8123 case X86::VBROADCASTSSZrm:
8125 case X86::VPBROADCASTQZ128rm:
8126 case X86::VPBROADCASTQZ256rm:
8127 case X86::VPBROADCASTQZrm:
8128 case X86::VBROADCASTSDZ256rm:
8129 case X86::VBROADCASTSDZrm:
8142 0, Alignment,
true);
8149 unsigned BitsSize,
bool AllowCommute)
const {
8153 ?
FuseInst(MF,
I->DstOp, OpNum, MOs, InsertPt,
MI, *
this)
8159 unsigned CommuteOpIdx2 = commuteOperandsForFold(
MI, OpNum);
8160 if (CommuteOpIdx2 == OpNum) {
8165 foldMemoryBroadcast(MF,
MI, CommuteOpIdx2, MOs, InsertPt, BitsSize,
8170 commuteInstruction(
MI,
false, OpNum, CommuteOpIdx2);
8185 if (!MMO->isStore()) {
8203 if (!MMO->isStore())
8206 if (!MMO->isLoad()) {
8224 assert((SpillSize == 64 || STI.hasVLX()) &&
8225 "Can't broadcast less than 64 bytes without AVX512VL!");
8227#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64) \
8229 switch (SpillSize) { \
8231 llvm_unreachable("Unknown spill size"); \
8265 unsigned Opc =
I->DstOp;
8269 if (UnfoldLoad && !FoldedLoad)
8271 UnfoldLoad &= FoldedLoad;
8272 if (UnfoldStore && !FoldedStore)
8274 UnfoldStore &= FoldedStore;
8281 if (!
MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
8282 Subtarget.isUnalignedMem16Slow())
8291 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
8295 else if (
Op.isReg() &&
Op.isImplicit())
8311 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*RC), 16);
8312 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8356 case X86::CMP64ri32:
8367 case X86::CMP64ri32:
8368 NewOpc = X86::TEST64rr;
8371 NewOpc = X86::TEST32rr;
8374 NewOpc = X86::TEST16rr;
8377 NewOpc = X86::TEST8rr;
8391 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*DstRC), 16);
8392 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8408 if (!
N->isMachineOpcode())
8414 unsigned Opc =
I->DstOp;
8422 unsigned NumDefs = MCID.
NumDefs;
8423 std::vector<SDValue> AddrOps;
8424 std::vector<SDValue> BeforeOps;
8425 std::vector<SDValue> AfterOps;
8427 unsigned NumOps =
N->getNumOperands();
8428 for (
unsigned i = 0; i != NumOps - 1; ++i) {
8431 AddrOps.push_back(
Op);
8432 else if (i <
Index - NumDefs)
8433 BeforeOps.push_back(
Op);
8434 else if (i >
Index - NumDefs)
8435 AfterOps.push_back(
Op);
8437 SDValue Chain =
N->getOperand(NumOps - 1);
8438 AddrOps.push_back(Chain);
8443 EVT VT = *
TRI.legalclasstypes_begin(*RC);
8445 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8446 Subtarget.isUnalignedMem16Slow())
8456 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*RC), 16);
8457 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8469 std::vector<EVT> VTs;
8473 VTs.push_back(*
TRI.legalclasstypes_begin(*DstRC));
8475 for (
unsigned i = 0, e =
N->getNumValues(); i != e; ++i) {
8476 EVT VT =
N->getValueType(i);
8477 if (VT != MVT::Other && i >= (
unsigned)MCID.
getNumDefs())
8481 BeforeOps.push_back(
SDValue(Load, 0));
8487 case X86::CMP64ri32:
8495 case X86::CMP64ri32:
8496 Opc = X86::TEST64rr;
8499 Opc = X86::TEST32rr;
8502 Opc = X86::TEST16rr;
8508 BeforeOps[1] = BeforeOps[0];
8517 AddrOps.push_back(
SDValue(NewNode, 0));
8518 AddrOps.push_back(Chain);
8520 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8521 Subtarget.isUnalignedMem16Slow())
8526 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*RC), 16);
8527 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8530 dl, MVT::Other, AddrOps);
8543 unsigned *LoadRegIndex)
const {
8549 if (UnfoldLoad && !FoldedLoad)
8551 if (UnfoldStore && !FoldedStore)
8560 int64_t &Offset2)
const {
8564 auto IsLoadOpcode = [&](
unsigned Opcode) {
8576 case X86::MOVSSrm_alt:
8578 case X86::MOVSDrm_alt:
8579 case X86::MMX_MOVD64rm:
8580 case X86::MMX_MOVQ64rm:
8589 case X86::VMOVSSrm_alt:
8591 case X86::VMOVSDrm_alt:
8592 case X86::VMOVAPSrm:
8593 case X86::VMOVUPSrm:
8594 case X86::VMOVAPDrm:
8595 case X86::VMOVUPDrm:
8596 case X86::VMOVDQArm:
8597 case X86::VMOVDQUrm:
8598 case X86::VMOVAPSYrm:
8599 case X86::VMOVUPSYrm:
8600 case X86::VMOVAPDYrm:
8601 case X86::VMOVUPDYrm:
8602 case X86::VMOVDQAYrm:
8603 case X86::VMOVDQUYrm:
8605 case X86::VMOVSSZrm:
8606 case X86::VMOVSSZrm_alt:
8607 case X86::VMOVSDZrm:
8608 case X86::VMOVSDZrm_alt:
8609 case X86::VMOVAPSZ128rm:
8610 case X86::VMOVUPSZ128rm:
8611 case X86::VMOVAPSZ128rm_NOVLX:
8612 case X86::VMOVUPSZ128rm_NOVLX:
8613 case X86::VMOVAPDZ128rm:
8614 case X86::VMOVUPDZ128rm:
8615 case X86::VMOVDQU8Z128rm:
8616 case X86::VMOVDQU16Z128rm:
8617 case X86::VMOVDQA32Z128rm:
8618 case X86::VMOVDQU32Z128rm:
8619 case X86::VMOVDQA64Z128rm:
8620 case X86::VMOVDQU64Z128rm:
8621 case X86::VMOVAPSZ256rm:
8622 case X86::VMOVUPSZ256rm:
8623 case X86::VMOVAPSZ256rm_NOVLX:
8624 case X86::VMOVUPSZ256rm_NOVLX:
8625 case X86::VMOVAPDZ256rm:
8626 case X86::VMOVUPDZ256rm:
8627 case X86::VMOVDQU8Z256rm:
8628 case X86::VMOVDQU16Z256rm:
8629 case X86::VMOVDQA32Z256rm:
8630 case X86::VMOVDQU32Z256rm:
8631 case X86::VMOVDQA64Z256rm:
8632 case X86::VMOVDQU64Z256rm:
8633 case X86::VMOVAPSZrm:
8634 case X86::VMOVUPSZrm:
8635 case X86::VMOVAPDZrm:
8636 case X86::VMOVUPDZrm:
8637 case X86::VMOVDQU8Zrm:
8638 case X86::VMOVDQU16Zrm:
8639 case X86::VMOVDQA32Zrm:
8640 case X86::VMOVDQU32Zrm:
8641 case X86::VMOVDQA64Zrm:
8642 case X86::VMOVDQU64Zrm:
8644 case X86::KMOVBkm_EVEX:
8646 case X86::KMOVWkm_EVEX:
8648 case X86::KMOVDkm_EVEX:
8650 case X86::KMOVQkm_EVEX:
8660 auto HasSameOp = [&](
int I) {
8676 if (!Disp1 || !Disp2)
8679 Offset1 = Disp1->getSExtValue();
8680 Offset2 = Disp2->getSExtValue();
8685 int64_t Offset1, int64_t Offset2,
8686 unsigned NumLoads)
const {
8687 assert(Offset2 > Offset1);
8688 if ((Offset2 - Offset1) / 8 > 64)
8702 case X86::MMX_MOVD64rm:
8703 case X86::MMX_MOVQ64rm:
8712 if (Subtarget.is64Bit()) {
8715 }
else if (NumLoads) {
8738 unsigned Opcode =
MI.getOpcode();
8739 if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
8740 Opcode == X86::PLDTILECFGV)
8748 assert(
Cond.size() == 1 &&
"Invalid X86 branch condition!");
8758 return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
8759 RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
8760 RC == &X86::RFP80RegClass);
8772 if (GlobalBaseReg != 0)
8773 return GlobalBaseReg;
8779 Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass);
8781 return GlobalBaseReg;
8789 for (
const uint16_t(&Row)[3] : Table)
8790 if (Row[domain - 1] == opcode)
8798 for (
const uint16_t(&Row)[4] : Table)
8799 if (Row[domain - 1] == opcode || (domain == 3 && Row[3] == opcode))
8806 unsigned NewWidth,
unsigned *pNewMask =
nullptr) {
8807 assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&
8808 "Illegal blend mask scale");
8809 unsigned NewMask = 0;
8811 if ((OldWidth % NewWidth) == 0) {
8812 unsigned Scale = OldWidth / NewWidth;
8813 unsigned SubMask = (1u << Scale) - 1;
8814 for (
unsigned i = 0; i != NewWidth; ++i) {
8815 unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
8817 NewMask |= (1u << i);
8818 else if (Sub != 0x0)
8822 unsigned Scale = NewWidth / OldWidth;
8823 unsigned SubMask = (1u << Scale) - 1;
8824 for (
unsigned i = 0; i != OldWidth; ++i) {
8825 if (OldMask & (1 << i)) {
8826 NewMask |= (SubMask << (i * Scale));
8832 *pNewMask = NewMask;
8837 unsigned Opcode =
MI.getOpcode();
8838 unsigned NumOperands =
MI.getDesc().getNumOperands();
8840 auto GetBlendDomains = [&](
unsigned ImmWidth,
bool Is256) {
8842 if (
MI.getOperand(NumOperands - 1).isImm()) {
8843 unsigned Imm =
MI.getOperand(NumOperands - 1).getImm();
8845 validDomains |= 0x2;
8847 validDomains |= 0x4;
8848 if (!Is256 || Subtarget.
hasAVX2())
8849 validDomains |= 0x8;
8851 return validDomains;
8855 case X86::BLENDPDrmi:
8856 case X86::BLENDPDrri:
8857 case X86::VBLENDPDrmi:
8858 case X86::VBLENDPDrri:
8859 return GetBlendDomains(2,
false);
8860 case X86::VBLENDPDYrmi:
8861 case X86::VBLENDPDYrri:
8862 return GetBlendDomains(4,
true);
8863 case X86::BLENDPSrmi:
8864 case X86::BLENDPSrri:
8865 case X86::VBLENDPSrmi:
8866 case X86::VBLENDPSrri:
8867 case X86::VPBLENDDrmi:
8868 case X86::VPBLENDDrri:
8869 return GetBlendDomains(4,
false);
8870 case X86::VBLENDPSYrmi:
8871 case X86::VBLENDPSYrri:
8872 case X86::VPBLENDDYrmi:
8873 case X86::VPBLENDDYrri:
8874 return GetBlendDomains(8,
true);
8875 case X86::PBLENDWrmi:
8876 case X86::PBLENDWrri:
8877 case X86::VPBLENDWrmi:
8878 case X86::VPBLENDWrri:
8880 case X86::VPBLENDWYrmi:
8881 case X86::VPBLENDWYrri:
8882 return GetBlendDomains(8,
false);
8883 case X86::VPANDDZ128rr:
8884 case X86::VPANDDZ128rm:
8885 case X86::VPANDDZ256rr:
8886 case X86::VPANDDZ256rm:
8887 case X86::VPANDQZ128rr:
8888 case X86::VPANDQZ128rm:
8889 case X86::VPANDQZ256rr:
8890 case X86::VPANDQZ256rm:
8891 case X86::VPANDNDZ128rr:
8892 case X86::VPANDNDZ128rm:
8893 case X86::VPANDNDZ256rr:
8894 case X86::VPANDNDZ256rm:
8895 case X86::VPANDNQZ128rr:
8896 case X86::VPANDNQZ128rm:
8897 case X86::VPANDNQZ256rr:
8898 case X86::VPANDNQZ256rm:
8899 case X86::VPORDZ128rr:
8900 case X86::VPORDZ128rm:
8901 case X86::VPORDZ256rr:
8902 case X86::VPORDZ256rm:
8903 case X86::VPORQZ128rr:
8904 case X86::VPORQZ128rm:
8905 case X86::VPORQZ256rr:
8906 case X86::VPORQZ256rm:
8907 case X86::VPXORDZ128rr:
8908 case X86::VPXORDZ128rm:
8909 case X86::VPXORDZ256rr:
8910 case X86::VPXORDZ256rm:
8911 case X86::VPXORQZ128rr:
8912 case X86::VPXORQZ128rm:
8913 case X86::VPXORQZ256rr:
8914 case X86::VPXORQZ256rm:
8917 if (Subtarget.hasDQI())
8920 if (RI.getEncodingValue(
MI.getOperand(0).getReg()) >= 16)
8922 if (RI.getEncodingValue(
MI.getOperand(1).getReg()) >= 16)
8925 if (NumOperands == 3 &&
8926 RI.getEncodingValue(
MI.getOperand(2).getReg()) >= 16)
8931 case X86::MOVHLPSrr:
8938 if (
MI.getOperand(1).getReg() ==
MI.getOperand(2).getReg() &&
8939 MI.getOperand(0).getSubReg() == 0 &&
8940 MI.getOperand(1).getSubReg() == 0 &&
MI.getOperand(2).getSubReg() == 0)
8943 case X86::SHUFPDrri:
8949#include "X86ReplaceableInstrs.def"
8955 assert(dom &&
"Not an SSE instruction");
8957 unsigned Opcode =
MI.getOpcode();
8958 unsigned NumOperands =
MI.getDesc().getNumOperands();
8960 auto SetBlendDomain = [&](
unsigned ImmWidth,
bool Is256) {
8961 if (
MI.getOperand(NumOperands - 1).isImm()) {
8962 unsigned Imm =
MI.getOperand(NumOperands - 1).getImm() & 255;
8963 Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
8964 unsigned NewImm = Imm;
8966 const uint16_t *table =
lookup(Opcode, dom, ReplaceableBlendInstrs);
8968 table =
lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
8972 }
else if (
Domain == 2) {
8974 }
else if (
Domain == 3) {
8977 if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
8978 table =
lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
8982 assert(!Is256 &&
"128-bit vector expected");
8987 assert(table && table[
Domain - 1] &&
"Unknown domain op");
8989 MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
8995 case X86::BLENDPDrmi:
8996 case X86::BLENDPDrri:
8997 case X86::VBLENDPDrmi:
8998 case X86::VBLENDPDrri:
8999 return SetBlendDomain(2,
false);
9000 case X86::VBLENDPDYrmi:
9001 case X86::VBLENDPDYrri:
9002 return SetBlendDomain(4,
true);
9003 case X86::BLENDPSrmi:
9004 case X86::BLENDPSrri:
9005 case X86::VBLENDPSrmi:
9006 case X86::VBLENDPSrri:
9007 case X86::VPBLENDDrmi:
9008 case X86::VPBLENDDrri:
9009 return SetBlendDomain(4,
false);
9010 case X86::VBLENDPSYrmi:
9011 case X86::VBLENDPSYrri:
9012 case X86::VPBLENDDYrmi:
9013 case X86::VPBLENDDYrri:
9014 return SetBlendDomain(8,
true);
9015 case X86::PBLENDWrmi:
9016 case X86::PBLENDWrri:
9017 case X86::VPBLENDWrmi:
9018 case X86::VPBLENDWrri:
9019 return SetBlendDomain(8,
false);
9020 case X86::VPBLENDWYrmi:
9021 case X86::VPBLENDWYrri:
9022 return SetBlendDomain(16,
true);
9023 case X86::VPANDDZ128rr:
9024 case X86::VPANDDZ128rm:
9025 case X86::VPANDDZ256rr:
9026 case X86::VPANDDZ256rm:
9027 case X86::VPANDQZ128rr:
9028 case X86::VPANDQZ128rm:
9029 case X86::VPANDQZ256rr:
9030 case X86::VPANDQZ256rm:
9031 case X86::VPANDNDZ128rr:
9032 case X86::VPANDNDZ128rm:
9033 case X86::VPANDNDZ256rr:
9034 case X86::VPANDNDZ256rm:
9035 case X86::VPANDNQZ128rr:
9036 case X86::VPANDNQZ128rm:
9037 case X86::VPANDNQZ256rr:
9038 case X86::VPANDNQZ256rm:
9039 case X86::VPORDZ128rr:
9040 case X86::VPORDZ128rm:
9041 case X86::VPORDZ256rr:
9042 case X86::VPORDZ256rm:
9043 case X86::VPORQZ128rr:
9044 case X86::VPORQZ128rm:
9045 case X86::VPORQZ256rr:
9046 case X86::VPORQZ256rm:
9047 case X86::VPXORDZ128rr:
9048 case X86::VPXORDZ128rm:
9049 case X86::VPXORDZ256rr:
9050 case X86::VPXORDZ256rm:
9051 case X86::VPXORQZ128rr:
9052 case X86::VPXORQZ128rm:
9053 case X86::VPXORQZ256rr:
9054 case X86::VPXORQZ256rm: {
9056 if (Subtarget.hasDQI())
9060 lookupAVX512(
MI.getOpcode(), dom, ReplaceableCustomAVX512LogicInstrs);
9061 assert(table &&
"Instruction not found in table?");
9064 if (
Domain == 3 && (dom == 1 || table[3] ==
MI.getOpcode()))
9069 case X86::UNPCKHPDrr:
9070 case X86::MOVHLPSrr:
9073 MI.getOperand(1).getReg() ==
MI.getOperand(2).getReg() &&
9074 MI.getOperand(0).getSubReg() == 0 &&
9075 MI.getOperand(1).getSubReg() == 0 &&
9076 MI.getOperand(2).getSubReg() == 0) {
9077 commuteInstruction(
MI,
false);
9081 if (Opcode == X86::MOVHLPSrr)
9084 case X86::SHUFPDrri: {
9086 unsigned Imm =
MI.getOperand(3).getImm();
9087 unsigned NewImm = 0x44;
9092 MI.getOperand(3).setImm(NewImm);
9093 MI.setDesc(
get(X86::SHUFPSrri));
9101std::pair<uint16_t, uint16_t>
9104 unsigned opcode =
MI.getOpcode();
9110 return std::make_pair(domain, validDomains);
9112 if (
lookup(opcode, domain, ReplaceableInstrs)) {
9114 }
else if (
lookup(opcode, domain, ReplaceableInstrsAVX2)) {
9115 validDomains = Subtarget.
hasAVX2() ? 0xe : 0x6;
9116 }
else if (
lookup(opcode, domain, ReplaceableInstrsFP)) {
9118 }
else if (
lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
9122 return std::make_pair(0, 0);
9124 }
else if (
lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
9126 }
else if (Subtarget.hasDQI() &&
9127 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
9129 }
else if (Subtarget.hasDQI()) {
9131 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQMasked)) {
9132 if (domain == 1 || (domain == 3 && table[3] == opcode))
9139 return std::make_pair(domain, validDomains);
9145 assert(dom &&
"Not an SSE instruction");
9154 "256-bit vector operations only available in AVX2");
9155 table =
lookup(
MI.getOpcode(), dom, ReplaceableInstrsAVX2);
9158 table =
lookup(
MI.getOpcode(), dom, ReplaceableInstrsFP);
9160 "Can only select PackedSingle or PackedDouble");
9164 "256-bit insert/extract only available in AVX2");
9165 table =
lookup(
MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
9169 table =
lookupAVX512(
MI.getOpcode(), dom, ReplaceableInstrsAVX512);
9171 if (table &&
Domain == 3 && table[3] ==
MI.getOpcode())
9175 assert((Subtarget.hasDQI() ||
Domain >= 3) &&
"Requires AVX-512DQ");
9176 table =
lookupAVX512(
MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
9179 if (table &&
Domain == 3 && (dom == 1 || table[3] ==
MI.getOpcode()))
9183 assert((Subtarget.hasDQI() ||
Domain >= 3) &&
"Requires AVX-512DQ");
9184 table =
lookupAVX512(
MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked);
9185 if (table &&
Domain == 3 && (dom == 1 || table[3] ==
MI.getOpcode()))
9188 assert(table &&
"Cannot change domain");
9214 case X86::DIVSDrm_Int:
9216 case X86::DIVSDrr_Int:
9218 case X86::DIVSSrm_Int:
9220 case X86::DIVSSrr_Int:
9226 case X86::SQRTSDm_Int:
9228 case X86::SQRTSDr_Int:
9230 case X86::SQRTSSm_Int:
9232 case X86::SQRTSSr_Int:
9236 case X86::VDIVPDYrm:
9237 case X86::VDIVPDYrr:
9240 case X86::VDIVPSYrm:
9241 case X86::VDIVPSYrr:
9243 case X86::VDIVSDrm_Int:
9245 case X86::VDIVSDrr_Int:
9247 case X86::VDIVSSrm_Int:
9249 case X86::VDIVSSrr_Int:
9252 case X86::VSQRTPDYm:
9253 case X86::VSQRTPDYr:
9256 case X86::VSQRTPSYm:
9257 case X86::VSQRTPSYr:
9259 case X86::VSQRTSDm_Int:
9261 case X86::VSQRTSDr_Int:
9263 case X86::VSQRTSSm_Int:
9265 case X86::VSQRTSSr_Int:
9267 case X86::VDIVPDZ128rm:
9268 case X86::VDIVPDZ128rmb:
9269 case X86::VDIVPDZ128rmbk:
9270 case X86::VDIVPDZ128rmbkz:
9271 case X86::VDIVPDZ128rmk:
9272 case X86::VDIVPDZ128rmkz:
9273 case X86::VDIVPDZ128rr:
9274 case X86::VDIVPDZ128rrk:
9275 case X86::VDIVPDZ128rrkz:
9276 case X86::VDIVPDZ256rm:
9277 case X86::VDIVPDZ256rmb:
9278 case X86::VDIVPDZ256rmbk:
9279 case X86::VDIVPDZ256rmbkz:
9280 case X86::VDIVPDZ256rmk:
9281 case X86::VDIVPDZ256rmkz:
9282 case X86::VDIVPDZ256rr:
9283 case X86::VDIVPDZ256rrk:
9284 case X86::VDIVPDZ256rrkz:
9285 case X86::VDIVPDZrrb:
9286 case X86::VDIVPDZrrbk:
9287 case X86::VDIVPDZrrbkz:
9288 case X86::VDIVPDZrm:
9289 case X86::VDIVPDZrmb:
9290 case X86::VDIVPDZrmbk:
9291 case X86::VDIVPDZrmbkz:
9292 case X86::VDIVPDZrmk:
9293 case X86::VDIVPDZrmkz:
9294 case X86::VDIVPDZrr:
9295 case X86::VDIVPDZrrk:
9296 case X86::VDIVPDZrrkz:
9297 case X86::VDIVPSZ128rm:
9298 case X86::VDIVPSZ128rmb:
9299 case X86::VDIVPSZ128rmbk:
9300 case X86::VDIVPSZ128rmbkz:
9301 case X86::VDIVPSZ128rmk:
9302 case X86::VDIVPSZ128rmkz:
9303 case X86::VDIVPSZ128rr:
9304 case X86::VDIVPSZ128rrk:
9305 case X86::VDIVPSZ128rrkz:
9306 case X86::VDIVPSZ256rm:
9307 case X86::VDIVPSZ256rmb:
9308 case X86::VDIVPSZ256rmbk:
9309 case X86::VDIVPSZ256rmbkz:
9310 case X86::VDIVPSZ256rmk:
9311 case X86::VDIVPSZ256rmkz:
9312 case X86::VDIVPSZ256rr:
9313 case X86::VDIVPSZ256rrk:
9314 case X86::VDIVPSZ256rrkz:
9315 case X86::VDIVPSZrrb:
9316 case X86::VDIVPSZrrbk:
9317 case X86::VDIVPSZrrbkz:
9318 case X86::VDIVPSZrm:
9319 case X86::VDIVPSZrmb:
9320 case X86::VDIVPSZrmbk:
9321 case X86::VDIVPSZrmbkz:
9322 case X86::VDIVPSZrmk:
9323 case X86::VDIVPSZrmkz:
9324 case X86::VDIVPSZrr:
9325 case X86::VDIVPSZrrk:
9326 case X86::VDIVPSZrrkz:
9327 case X86::VDIVSDZrm:
9328 case X86::VDIVSDZrr:
9329 case X86::VDIVSDZrm_Int:
9330 case X86::VDIVSDZrm_Intk:
9331 case X86::VDIVSDZrm_Intkz:
9332 case X86::VDIVSDZrr_Int:
9333 case X86::VDIVSDZrr_Intk:
9334 case X86::VDIVSDZrr_Intkz:
9335 case X86::VDIVSDZrrb_Int:
9336 case X86::VDIVSDZrrb_Intk:
9337 case X86::VDIVSDZrrb_Intkz:
9338 case X86::VDIVSSZrm:
9339 case X86::VDIVSSZrr:
9340 case X86::VDIVSSZrm_Int:
9341 case X86::VDIVSSZrm_Intk:
9342 case X86::VDIVSSZrm_Intkz:
9343 case X86::VDIVSSZrr_Int:
9344 case X86::VDIVSSZrr_Intk:
9345 case X86::VDIVSSZrr_Intkz:
9346 case X86::VDIVSSZrrb_Int:
9347 case X86::VDIVSSZrrb_Intk:
9348 case X86::VDIVSSZrrb_Intkz:
9349 case X86::VSQRTPDZ128m:
9350 case X86::VSQRTPDZ128mb:
9351 case X86::VSQRTPDZ128mbk:
9352 case X86::VSQRTPDZ128mbkz:
9353 case X86::VSQRTPDZ128mk:
9354 case X86::VSQRTPDZ128mkz:
9355 case X86::VSQRTPDZ128r:
9356 case X86::VSQRTPDZ128rk:
9357 case X86::VSQRTPDZ128rkz:
9358 case X86::VSQRTPDZ256m:
9359 case X86::VSQRTPDZ256mb:
9360 case X86::VSQRTPDZ256mbk:
9361 case X86::VSQRTPDZ256mbkz:
9362 case X86::VSQRTPDZ256mk:
9363 case X86::VSQRTPDZ256mkz:
9364 case X86::VSQRTPDZ256r:
9365 case X86::VSQRTPDZ256rk:
9366 case X86::VSQRTPDZ256rkz:
9367 case X86::VSQRTPDZm:
9368 case X86::VSQRTPDZmb:
9369 case X86::VSQRTPDZmbk:
9370 case X86::VSQRTPDZmbkz:
9371 case X86::VSQRTPDZmk:
9372 case X86::VSQRTPDZmkz:
9373 case X86::VSQRTPDZr:
9374 case X86::VSQRTPDZrb:
9375 case X86::VSQRTPDZrbk:
9376 case X86::VSQRTPDZrbkz:
9377 case X86::VSQRTPDZrk:
9378 case X86::VSQRTPDZrkz:
9379 case X86::VSQRTPSZ128m:
9380 case X86::VSQRTPSZ128mb:
9381 case X86::VSQRTPSZ128mbk:
9382 case X86::VSQRTPSZ128mbkz:
9383 case X86::VSQRTPSZ128mk:
9384 case X86::VSQRTPSZ128mkz:
9385 case X86::VSQRTPSZ128r:
9386 case X86::VSQRTPSZ128rk:
9387 case X86::VSQRTPSZ128rkz:
9388 case X86::VSQRTPSZ256m:
9389 case X86::VSQRTPSZ256mb:
9390 case X86::VSQRTPSZ256mbk:
9391 case X86::VSQRTPSZ256mbkz:
9392 case X86::VSQRTPSZ256mk:
9393 case X86::VSQRTPSZ256mkz:
9394 case X86::VSQRTPSZ256r:
9395 case X86::VSQRTPSZ256rk:
9396 case X86::VSQRTPSZ256rkz:
9397 case X86::VSQRTPSZm:
9398 case X86::VSQRTPSZmb:
9399 case X86::VSQRTPSZmbk:
9400 case X86::VSQRTPSZmbkz:
9401 case X86::VSQRTPSZmk:
9402 case X86::VSQRTPSZmkz:
9403 case X86::VSQRTPSZr:
9404 case X86::VSQRTPSZrb:
9405 case X86::VSQRTPSZrbk:
9406 case X86::VSQRTPSZrbkz:
9407 case X86::VSQRTPSZrk:
9408 case X86::VSQRTPSZrkz:
9409 case X86::VSQRTSDZm:
9410 case X86::VSQRTSDZm_Int:
9411 case X86::VSQRTSDZm_Intk:
9412 case X86::VSQRTSDZm_Intkz:
9413 case X86::VSQRTSDZr:
9414 case X86::VSQRTSDZr_Int:
9415 case X86::VSQRTSDZr_Intk:
9416 case X86::VSQRTSDZr_Intkz:
9417 case X86::VSQRTSDZrb_Int:
9418 case X86::VSQRTSDZrb_Intk:
9419 case X86::VSQRTSDZrb_Intkz:
9420 case X86::VSQRTSSZm:
9421 case X86::VSQRTSSZm_Int:
9422 case X86::VSQRTSSZm_Intk:
9423 case X86::VSQRTSSZm_Intkz:
9424 case X86::VSQRTSSZr:
9425 case X86::VSQRTSSZr_Int:
9426 case X86::VSQRTSSZr_Intk:
9427 case X86::VSQRTSSZr_Intkz:
9428 case X86::VSQRTSSZrb_Int:
9429 case X86::VSQRTSSZrb_Intk:
9430 case X86::VSQRTSSZrb_Intkz:
9432 case X86::VGATHERDPDYrm:
9433 case X86::VGATHERDPDZ128rm:
9434 case X86::VGATHERDPDZ256rm:
9435 case X86::VGATHERDPDZrm:
9436 case X86::VGATHERDPDrm:
9437 case X86::VGATHERDPSYrm:
9438 case X86::VGATHERDPSZ128rm:
9439 case X86::VGATHERDPSZ256rm:
9440 case X86::VGATHERDPSZrm:
9441 case X86::VGATHERDPSrm:
9442 case X86::VGATHERPF0DPDm:
9443 case X86::VGATHERPF0DPSm:
9444 case X86::VGATHERPF0QPDm:
9445 case X86::VGATHERPF0QPSm:
9446 case X86::VGATHERPF1DPDm:
9447 case X86::VGATHERPF1DPSm:
9448 case X86::VGATHERPF1QPDm:
9449 case X86::VGATHERPF1QPSm:
9450 case X86::VGATHERQPDYrm:
9451 case X86::VGATHERQPDZ128rm:
9452 case X86::VGATHERQPDZ256rm:
9453 case X86::VGATHERQPDZrm:
9454 case X86::VGATHERQPDrm:
9455 case X86::VGATHERQPSYrm:
9456 case X86::VGATHERQPSZ128rm:
9457 case X86::VGATHERQPSZ256rm:
9458 case X86::VGATHERQPSZrm:
9459 case X86::VGATHERQPSrm:
9460 case X86::VPGATHERDDYrm:
9461 case X86::VPGATHERDDZ128rm:
9462 case X86::VPGATHERDDZ256rm:
9463 case X86::VPGATHERDDZrm:
9464 case X86::VPGATHERDDrm:
9465 case X86::VPGATHERDQYrm:
9466 case X86::VPGATHERDQZ128rm:
9467 case X86::VPGATHERDQZ256rm:
9468 case X86::VPGATHERDQZrm:
9469 case X86::VPGATHERDQrm:
9470 case X86::VPGATHERQDYrm:
9471 case X86::VPGATHERQDZ128rm:
9472 case X86::VPGATHERQDZ256rm:
9473 case X86::VPGATHERQDZrm:
9474 case X86::VPGATHERQDrm:
9475 case X86::VPGATHERQQYrm:
9476 case X86::VPGATHERQQZ128rm:
9477 case X86::VPGATHERQQZ256rm:
9478 case X86::VPGATHERQQZrm:
9479 case X86::VPGATHERQQrm:
9480 case X86::VSCATTERDPDZ128mr:
9481 case X86::VSCATTERDPDZ256mr:
9482 case X86::VSCATTERDPDZmr:
9483 case X86::VSCATTERDPSZ128mr:
9484 case X86::VSCATTERDPSZ256mr:
9485 case X86::VSCATTERDPSZmr:
9486 case X86::VSCATTERPF0DPDm:
9487 case X86::VSCATTERPF0DPSm:
9488 case X86::VSCATTERPF0QPDm:
9489 case X86::VSCATTERPF0QPSm:
9490 case X86::VSCATTERPF1DPDm:
9491 case X86::VSCATTERPF1DPSm:
9492 case X86::VSCATTERPF1QPDm:
9493 case X86::VSCATTERPF1QPSm:
9494 case X86::VSCATTERQPDZ128mr:
9495 case X86::VSCATTERQPDZ256mr:
9496 case X86::VSCATTERQPDZmr:
9497 case X86::VSCATTERQPSZ128mr:
9498 case X86::VSCATTERQPSZ256mr:
9499 case X86::VSCATTERQPSZmr:
9500 case X86::VPSCATTERDDZ128mr:
9501 case X86::VPSCATTERDDZ256mr:
9502 case X86::VPSCATTERDDZmr:
9503 case X86::VPSCATTERDQZ128mr:
9504 case X86::VPSCATTERDQZ256mr:
9505 case X86::VPSCATTERDQZmr:
9506 case X86::VPSCATTERQDZ128mr:
9507 case X86::VPSCATTERQDZ256mr:
9508 case X86::VPSCATTERQDZmr:
9509 case X86::VPSCATTERQQZ128mr:
9510 case X86::VPSCATTERQQZ256mr:
9511 case X86::VPSCATTERQQZmr:
9521 unsigned UseIdx)
const {
9528 Inst.
getNumDefs() <= 2 &&
"Reassociation needs binary operators");
9537 assert((Inst.
getNumDefs() == 1 || FlagDef) &&
"Implicit def isn't flags?");
9538 if (FlagDef && !FlagDef->
isDead())
9549 bool Invert)
const {
9601 case X86::VPANDDZ128rr:
9602 case X86::VPANDDZ256rr:
9603 case X86::VPANDDZrr:
9604 case X86::VPANDQZ128rr:
9605 case X86::VPANDQZ256rr:
9606 case X86::VPANDQZrr:
9609 case X86::VPORDZ128rr:
9610 case X86::VPORDZ256rr:
9612 case X86::VPORQZ128rr:
9613 case X86::VPORQZ256rr:
9617 case X86::VPXORDZ128rr:
9618 case X86::VPXORDZ256rr:
9619 case X86::VPXORDZrr:
9620 case X86::VPXORQZ128rr:
9621 case X86::VPXORQZ256rr:
9622 case X86::VPXORQZrr:
9625 case X86::VANDPDYrr:
9626 case X86::VANDPSYrr:
9627 case X86::VANDPDZ128rr:
9628 case X86::VANDPSZ128rr:
9629 case X86::VANDPDZ256rr:
9630 case X86::VANDPSZ256rr:
9631 case X86::VANDPDZrr:
9632 case X86::VANDPSZrr:
9637 case X86::VORPDZ128rr:
9638 case X86::VORPSZ128rr:
9639 case X86::VORPDZ256rr:
9640 case X86::VORPSZ256rr:
9645 case X86::VXORPDYrr:
9646 case X86::VXORPSYrr:
9647 case X86::VXORPDZ128rr:
9648 case X86::VXORPSZ128rr:
9649 case X86::VXORPDZ256rr:
9650 case X86::VXORPSZ256rr:
9651 case X86::VXORPDZrr:
9652 case X86::VXORPSZrr:
9673 case X86::VPADDBYrr:
9674 case X86::VPADDWYrr:
9675 case X86::VPADDDYrr:
9676 case X86::VPADDQYrr:
9677 case X86::VPADDBZ128rr:
9678 case X86::VPADDWZ128rr:
9679 case X86::VPADDDZ128rr:
9680 case X86::VPADDQZ128rr:
9681 case X86::VPADDBZ256rr:
9682 case X86::VPADDWZ256rr:
9683 case X86::VPADDDZ256rr:
9684 case X86::VPADDQZ256rr:
9685 case X86::VPADDBZrr:
9686 case X86::VPADDWZrr:
9687 case X86::VPADDDZrr:
9688 case X86::VPADDQZrr:
9689 case X86::VPMULLWrr:
9690 case X86::VPMULLWYrr:
9691 case X86::VPMULLWZ128rr:
9692 case X86::VPMULLWZ256rr:
9693 case X86::VPMULLWZrr:
9694 case X86::VPMULLDrr:
9695 case X86::VPMULLDYrr:
9696 case X86::VPMULLDZ128rr:
9697 case X86::VPMULLDZ256rr:
9698 case X86::VPMULLDZrr:
9699 case X86::VPMULLQZ128rr:
9700 case X86::VPMULLQZ256rr:
9701 case X86::VPMULLQZrr:
9702 case X86::VPMAXSBrr:
9703 case X86::VPMAXSBYrr:
9704 case X86::VPMAXSBZ128rr:
9705 case X86::VPMAXSBZ256rr:
9706 case X86::VPMAXSBZrr:
9707 case X86::VPMAXSDrr:
9708 case X86::VPMAXSDYrr:
9709 case X86::VPMAXSDZ128rr:
9710 case X86::VPMAXSDZ256rr:
9711 case X86::VPMAXSDZrr:
9712 case X86::VPMAXSQZ128rr:
9713 case X86::VPMAXSQZ256rr:
9714 case X86::VPMAXSQZrr:
9715 case X86::VPMAXSWrr:
9716 case X86::VPMAXSWYrr:
9717 case X86::VPMAXSWZ128rr:
9718 case X86::VPMAXSWZ256rr:
9719 case X86::VPMAXSWZrr:
9720 case X86::VPMAXUBrr:
9721 case X86::VPMAXUBYrr:
9722 case X86::VPMAXUBZ128rr:
9723 case X86::VPMAXUBZ256rr:
9724 case X86::VPMAXUBZrr:
9725 case X86::VPMAXUDrr:
9726 case X86::VPMAXUDYrr:
9727 case X86::VPMAXUDZ128rr:
9728 case X86::VPMAXUDZ256rr:
9729 case X86::VPMAXUDZrr:
9730 case X86::VPMAXUQZ128rr:
9731 case X86::VPMAXUQZ256rr:
9732 case X86::VPMAXUQZrr:
9733 case X86::VPMAXUWrr:
9734 case X86::VPMAXUWYrr:
9735 case X86::VPMAXUWZ128rr:
9736 case X86::VPMAXUWZ256rr:
9737 case X86::VPMAXUWZrr:
9738 case X86::VPMINSBrr:
9739 case X86::VPMINSBYrr:
9740 case X86::VPMINSBZ128rr:
9741 case X86::VPMINSBZ256rr:
9742 case X86::VPMINSBZrr:
9743 case X86::VPMINSDrr:
9744 case X86::VPMINSDYrr:
9745 case X86::VPMINSDZ128rr:
9746 case X86::VPMINSDZ256rr:
9747 case X86::VPMINSDZrr:
9748 case X86::VPMINSQZ128rr:
9749 case X86::VPMINSQZ256rr:
9750 case X86::VPMINSQZrr:
9751 case X86::VPMINSWrr:
9752 case X86::VPMINSWYrr:
9753 case X86::VPMINSWZ128rr:
9754 case X86::VPMINSWZ256rr:
9755 case X86::VPMINSWZrr:
9756 case X86::VPMINUBrr:
9757 case X86::VPMINUBYrr:
9758 case X86::VPMINUBZ128rr:
9759 case X86::VPMINUBZ256rr:
9760 case X86::VPMINUBZrr:
9761 case X86::VPMINUDrr:
9762 case X86::VPMINUDYrr:
9763 case X86::VPMINUDZ128rr:
9764 case X86::VPMINUDZ256rr:
9765 case X86::VPMINUDZrr:
9766 case X86::VPMINUQZ128rr:
9767 case X86::VPMINUQZ256rr:
9768 case X86::VPMINUQZrr:
9769 case X86::VPMINUWrr:
9770 case X86::VPMINUWYrr:
9771 case X86::VPMINUWZ128rr:
9772 case X86::VPMINUWZ256rr:
9773 case X86::VPMINUWZrr:
9785 case X86::VMAXCPDrr:
9786 case X86::VMAXCPSrr:
9787 case X86::VMAXCPDYrr:
9788 case X86::VMAXCPSYrr:
9789 case X86::VMAXCPDZ128rr:
9790 case X86::VMAXCPSZ128rr:
9791 case X86::VMAXCPDZ256rr:
9792 case X86::VMAXCPSZ256rr:
9793 case X86::VMAXCPDZrr:
9794 case X86::VMAXCPSZrr:
9795 case X86::VMAXCSDrr:
9796 case X86::VMAXCSSrr:
9797 case X86::VMAXCSDZrr:
9798 case X86::VMAXCSSZrr:
9799 case X86::VMINCPDrr:
9800 case X86::VMINCPSrr:
9801 case X86::VMINCPDYrr:
9802 case X86::VMINCPSYrr:
9803 case X86::VMINCPDZ128rr:
9804 case X86::VMINCPSZ128rr:
9805 case X86::VMINCPDZ256rr:
9806 case X86::VMINCPSZ256rr:
9807 case X86::VMINCPDZrr:
9808 case X86::VMINCPSZrr:
9809 case X86::VMINCSDrr:
9810 case X86::VMINCSSrr:
9811 case X86::VMINCSDZrr:
9812 case X86::VMINCSSZrr:
9813 case X86::VMAXCPHZ128rr:
9814 case X86::VMAXCPHZ256rr:
9815 case X86::VMAXCPHZrr:
9816 case X86::VMAXCSHZrr:
9817 case X86::VMINCPHZ128rr:
9818 case X86::VMINCPHZ256rr:
9819 case X86::VMINCPHZrr:
9820 case X86::VMINCSHZrr:
9832 case X86::VADDPDYrr:
9833 case X86::VADDPSYrr:
9834 case X86::VADDPDZ128rr:
9835 case X86::VADDPSZ128rr:
9836 case X86::VADDPDZ256rr:
9837 case X86::VADDPSZ256rr:
9838 case X86::VADDPDZrr:
9839 case X86::VADDPSZrr:
9842 case X86::VADDSDZrr:
9843 case X86::VADDSSZrr:
9846 case X86::VMULPDYrr:
9847 case X86::VMULPSYrr:
9848 case X86::VMULPDZ128rr:
9849 case X86::VMULPSZ128rr:
9850 case X86::VMULPDZ256rr:
9851 case X86::VMULPSZ256rr:
9852 case X86::VMULPDZrr:
9853 case X86::VMULPSZrr:
9856 case X86::VMULSDZrr:
9857 case X86::VMULSSZrr:
9858 case X86::VADDPHZ128rr:
9859 case X86::VADDPHZ256rr:
9860 case X86::VADDPHZrr:
9861 case X86::VADDSHZrr:
9862 case X86::VMULPHZ128rr:
9863 case X86::VMULPHZ256rr:
9864 case X86::VMULPHZrr:
9865 case X86::VMULSHZrr:
9876static std::optional<ParamLoadedValue>
9879 Register DestReg =
MI.getOperand(0).getReg();
9885 if (DestReg == DescribedReg)
9890 if (
unsigned SubRegIdx =
TRI->getSubRegIndex(DestReg, DescribedReg)) {
9891 Register SrcSubReg =
TRI->getSubReg(SrcReg, SubRegIdx);
9901 if (
MI.getOpcode() == X86::MOV8rr ||
MI.getOpcode() == X86::MOV16rr ||
9902 !
TRI->isSuperRegister(DestReg, DescribedReg))
9903 return std::nullopt;
9905 assert(
MI.getOpcode() == X86::MOV32rr &&
"Unexpected super-register case");
9909std::optional<ParamLoadedValue>
9916 switch (
MI.getOpcode()) {
9919 case X86::LEA64_32r: {
9921 if (!
TRI->isSuperRegisterEq(
MI.getOperand(0).getReg(), Reg))
9922 return std::nullopt;
9926 if (!
MI.getOperand(4).isImm() || !
MI.getOperand(2).isImm())
9927 return std::nullopt;
9936 if ((Op1.
isReg() && Op1.
getReg() ==
MI.getOperand(0).getReg()) ||
9937 Op2.
getReg() ==
MI.getOperand(0).getReg())
9938 return std::nullopt;
9939 else if ((Op1.
isReg() && Op1.
getReg() != X86::NoRegister &&
9940 TRI->regsOverlap(Op1.
getReg(),
MI.getOperand(0).getReg())) ||
9941 (Op2.
getReg() != X86::NoRegister &&
9942 TRI->regsOverlap(Op2.
getReg(),
MI.getOperand(0).getReg())))
9943 return std::nullopt;
9945 int64_t Coef =
MI.getOperand(2).getImm();
9946 int64_t
Offset =
MI.getOperand(4).getImm();
9949 if ((Op1.
isReg() && Op1.
getReg() != X86::NoRegister)) {
9951 }
else if (Op1.
isFI())
9954 if (
Op &&
Op->isReg() &&
Op->getReg() == Op2.
getReg() && Coef > 0) {
9959 if (
Op && Op2.
getReg() != X86::NoRegister) {
9960 int dwarfReg =
TRI->getDwarfRegNum(Op2.
getReg(),
false);
9962 return std::nullopt;
9963 else if (dwarfReg < 32) {
9964 Ops.
push_back(dwarf::DW_OP_breg0 + dwarfReg);
9983 if (((Op1.
isReg() && Op1.
getReg() != X86::NoRegister) || Op1.
isFI()) &&
9984 Op2.
getReg() != X86::NoRegister) {
9997 return std::nullopt;
10000 case X86::MOV64ri32:
10003 if (!
TRI->isSuperRegisterEq(
MI.getOperand(0).getReg(), Reg))
10004 return std::nullopt;
10011 case X86::XOR32rr: {
10014 if (!
TRI->isSuperRegisterEq(
MI.getOperand(0).getReg(), Reg))
10015 return std::nullopt;
10016 if (
MI.getOperand(1).getReg() ==
MI.getOperand(2).getReg())
10018 return std::nullopt;
10020 case X86::MOVSX64rr32: {
10027 if (!
TRI->isSubRegisterEq(
MI.getOperand(0).getReg(), Reg))
10028 return std::nullopt;
10037 if (Reg ==
MI.getOperand(0).getReg())
10040 assert(X86MCRegisterClasses[X86::GR32RegClassID].
contains(Reg) &&
10041 "Unhandled sub-register case for MOVSX64rr32");
10046 assert(!
MI.isMoveImmediate() &&
"Unexpected MoveImm instruction");
10061 assert(!OldFlagDef1 == !OldFlagDef2 &&
10062 "Unexpected instruction type for reassociation");
10064 if (!OldFlagDef1 || !OldFlagDef2)
10068 "Must have dead EFLAGS operand in reassociable instruction");
10073 assert(NewFlagDef1 && NewFlagDef2 &&
10074 "Unexpected operand in reassociable instruction");
10084std::pair<unsigned, unsigned>
10086 return std::make_pair(TF, 0u);
10091 using namespace X86II;
10092 static const std::pair<unsigned, const char *> TargetFlags[] = {
10093 {MO_GOT_ABSOLUTE_ADDRESS,
"x86-got-absolute-address"},
10094 {MO_PIC_BASE_OFFSET,
"x86-pic-base-offset"},
10095 {MO_GOT,
"x86-got"},
10096 {MO_GOTOFF,
"x86-gotoff"},
10097 {MO_GOTPCREL,
"x86-gotpcrel"},
10098 {MO_GOTPCREL_NORELAX,
"x86-gotpcrel-norelax"},
10099 {MO_PLT,
"x86-plt"},
10100 {MO_TLSGD,
"x86-tlsgd"},
10101 {MO_TLSLD,
"x86-tlsld"},
10102 {MO_TLSLDM,
"x86-tlsldm"},
10103 {MO_GOTTPOFF,
"x86-gottpoff"},
10104 {MO_INDNTPOFF,
"x86-indntpoff"},
10105 {MO_TPOFF,
"x86-tpoff"},
10106 {MO_DTPOFF,
"x86-dtpoff"},
10107 {MO_NTPOFF,
"x86-ntpoff"},
10108 {MO_GOTNTPOFF,
"x86-gotntpoff"},
10109 {MO_DLLIMPORT,
"x86-dllimport"},
10110 {MO_DARWIN_NONLAZY,
"x86-darwin-nonlazy"},
10111 {MO_DARWIN_NONLAZY_PIC_BASE,
"x86-darwin-nonlazy-pic-base"},
10112 {MO_TLVP,
"x86-tlvp"},
10113 {MO_TLVP_PIC_BASE,
"x86-tlvp-pic-base"},
10114 {MO_SECREL,
"x86-secrel"},
10115 {MO_COFFSTUB,
"x86-coffstub"}};
10132 if (!
TM->isPositionIndependent())
10139 if (GlobalBaseReg == 0)
10151 PC =
RegInfo.createVirtualRegister(&X86::GR32RegClass);
10153 PC = GlobalBaseReg;
10155 if (STI.is64Bit()) {
10208 StringRef getPassName()
const override {
10209 return "X86 PIC Global Base Reg Initialization";
10248 bool Changed =
false;
10253 switch (
I->getOpcode()) {
10254 case X86::TLS_base_addr32:
10255 case X86::TLS_base_addr64:
10256 if (TLSBaseAddrReg)
10257 I = ReplaceTLSBaseAddrCall(*
I, TLSBaseAddrReg);
10259 I = SetRegister(*
I, &TLSBaseAddrReg);
10268 for (
auto &
I : *
Node) {
10269 Changed |= VisitNode(
I, TLSBaseAddrReg);
10278 unsigned TLSBaseAddrReg) {
10281 const bool is64Bit = STI.is64Bit();
10287 TII->get(TargetOpcode::COPY),
is64Bit ? X86::RAX : X86::EAX)
10288 .
addReg(TLSBaseAddrReg);
10291 I.eraseFromParent();
10301 const bool is64Bit = STI.is64Bit();
10306 *TLSBaseAddrReg =
RegInfo.createVirtualRegister(
10307 is64Bit ? &X86::GR64RegClass : &X86::GR32RegClass);
10312 TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
10318 StringRef getPassName()
const override {
10319 return "Local Dynamic TLS Access Clean-up";
10330char LDTLSCleanup::ID = 0;
10332 return new LDTLSCleanup();
10365std::optional<outliner::OutlinedFunction>
10367 std::vector<outliner::Candidate> &RepeatedSequenceLocs)
const {
10368 unsigned SequenceSize = 0;
10369 for (
auto &
MI : RepeatedSequenceLocs[0]) {
10373 if (
MI.isDebugInstr() ||
MI.isKill())
10380 unsigned CFICount = 0;
10381 for (
auto &
I : RepeatedSequenceLocs[0]) {
10382 if (
I.isCFIInstruction())
10392 std::vector<MCCFIInstruction> CFIInstructions =
10393 C.getMF()->getFrameInstructions();
10395 if (CFICount > 0 && CFICount != CFIInstructions.size())
10396 return std::nullopt;
10400 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10411 return std::nullopt;
10435 if (!OutlineFromLinkOnceODRs &&
F.hasLinkOnceODRLinkage())
10444 unsigned Flags)
const {
10448 if (
MI.isTerminator())
10462 if (
MI.modifiesRegister(X86::RSP, &RI) ||
MI.readsRegister(X86::RSP, &RI) ||
10463 MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
10464 MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
10468 if (
MI.readsRegister(X86::RIP, &RI) ||
10469 MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
10470 MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
10474 if (
MI.isCFIInstruction())
10500 .addGlobalAddress(M.getNamedValue(MF.
getName())));
10504 .addGlobalAddress(M.getNamedValue(MF.
getName())));
10513 bool AllowSideEffects)
const {
10518 if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
10522 if (
TRI.isGeneralPurposeRegister(MF, Reg)) {
10527 if (!AllowSideEffects)
10534 }
else if (X86::VR128RegClass.
contains(Reg)) {
10543 }
else if (X86::VR256RegClass.
contains(Reg)) {
10552 }
else if (X86::VR512RegClass.
contains(Reg)) {
10554 if (!ST.hasAVX512())
10561 }
else if (X86::VK1RegClass.
contains(Reg) || X86::VK2RegClass.
contains(Reg) ||
10563 X86::VK16RegClass.
contains(Reg)) {
10568 unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
10577 bool DoRegPressureReduce)
const {
10580 case X86::VPDPWSSDrr:
10581 case X86::VPDPWSSDrm:
10582 case X86::VPDPWSSDYrr:
10583 case X86::VPDPWSSDYrm: {
10584 if (!Subtarget.hasFastDPWSSD()) {
10590 case X86::VPDPWSSDZ128r:
10591 case X86::VPDPWSSDZ128m:
10592 case X86::VPDPWSSDZ256r:
10593 case X86::VPDPWSSDZ256m:
10594 case X86::VPDPWSSDZr:
10595 case X86::VPDPWSSDZm: {
10596 if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10604 Patterns, DoRegPressureReduce);
10616 unsigned AddOpc = 0;
10617 unsigned MaddOpc = 0;
10620 assert(
false &&
"It should not reach here");
10626 case X86::VPDPWSSDrr:
10627 MaddOpc = X86::VPMADDWDrr;
10628 AddOpc = X86::VPADDDrr;
10630 case X86::VPDPWSSDrm:
10631 MaddOpc = X86::VPMADDWDrm;
10632 AddOpc = X86::VPADDDrr;
10634 case X86::VPDPWSSDZ128r:
10635 MaddOpc = X86::VPMADDWDZ128rr;
10636 AddOpc = X86::VPADDDZ128rr;
10638 case X86::VPDPWSSDZ128m:
10639 MaddOpc = X86::VPMADDWDZ128rm;
10640 AddOpc = X86::VPADDDZ128rr;
10646 case X86::VPDPWSSDYrr:
10647 MaddOpc = X86::VPMADDWDYrr;
10648 AddOpc = X86::VPADDDYrr;
10650 case X86::VPDPWSSDYrm:
10651 MaddOpc = X86::VPMADDWDYrm;
10652 AddOpc = X86::VPADDDYrr;
10654 case X86::VPDPWSSDZ256r:
10655 MaddOpc = X86::VPMADDWDZ256rr;
10656 AddOpc = X86::VPADDDZ256rr;
10658 case X86::VPDPWSSDZ256m:
10659 MaddOpc = X86::VPMADDWDZ256rm;
10660 AddOpc = X86::VPADDDZ256rr;
10666 case X86::VPDPWSSDZr:
10667 MaddOpc = X86::VPMADDWDZrr;
10668 AddOpc = X86::VPADDDZrr;
10670 case X86::VPDPWSSDZm:
10671 MaddOpc = X86::VPMADDWDZrm;
10672 AddOpc = X86::VPADDDZrr;
10684 InstrIdxForVirtReg.
insert(std::make_pair(NewReg, 0));
10706 DelInstrs, InstrIdxForVirtReg);
10710 InstrIdxForVirtReg);
10720 M.Base.FrameIndex = FI;
10721 M.getFullAddress(Ops);
10724#define GET_INSTRINFO_HELPERS
10725#include "X86GenInstrInfo.inc"
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
Expand a single-def pseudo instruction to a two-addr instruction with two undef reads of the register...
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool is64Bit(const char *name)
#define GET_EGPR_IF_ENABLED(OPC)
#define FROM_TO(FROM, TO, IDX1, IDX2)
static bool isLEA(unsigned Opcode)
static void addOperands(MachineInstrBuilder &MIB, ArrayRef< MachineOperand > MOs, int PtrOffset=0)
static std::optional< ParamLoadedValue > describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetRegisterInfo *TRI)
If DescribedReg overlaps with the MOVrr instruction's destination register then, if possible,...
static cl::opt< unsigned > PartialRegUpdateClearance("partial-reg-update-clearance", cl::desc("Clearance between two register writes " "for inserting XOR to avoid partial " "register update"), cl::init(64), cl::Hidden)
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI)
static bool isConvertibleLEA(MachineInstr *MI)
static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, const X86Subtarget &Subtarget)
static bool isAMXOpcode(unsigned Opc)
static int getJumpTableIndexFromReg(const MachineRegisterInfo &MRI, Register Reg)
static void updateOperandRegConstraints(MachineFunction &MF, MachineInstr &NewMI, const TargetInstrInfo &TII)
static bool findRedundantFlagInstr(MachineInstr &CmpInstr, MachineInstr &CmpValDefInstr, const MachineRegisterInfo *MRI, MachineInstr **AndInstr, const TargetRegisterInfo *TRI, bool &NoSignFlag, bool &ClearsOverflowFlag)
static int getJumpTableIndexFromAddr(const MachineInstr &MI)
static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, unsigned NewWidth, unsigned *pNewMask=nullptr)
static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne)
static unsigned getStoreRegOpcode(Register SrcReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
#define FOLD_BROADCAST(SIZE)
static cl::opt< unsigned > UndefRegClearance("undef-reg-clearance", cl::desc("How many idle instructions we would like before " "certain undef register reads"), cl::init(128), cl::Hidden)
#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64)
static bool isTruncatedShiftCountForLEA(unsigned ShAmt)
Check whether the given shift count is appropriate can be represented by a LEA instruction.
static cl::opt< bool > ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden)
static SmallVector< MachineMemOperand *, 2 > extractLoadMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static void printFailMsgforFold(const MachineInstr &MI, unsigned Idx)
static bool canConvert2Copy(unsigned Opc)
static MachineInstr * FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII)
static cl::opt< bool > NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions"), cl::Hidden)
static bool expandNOVLXStore(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx)
static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes)
static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, Register Reg)
Expand a single-def pseudo instruction to a two-addr instruction with two k0 reads.
#define VPERM_CASES_BROADCAST(Suffix)
static X86::CondCode isUseDefConvertible(const MachineInstr &MI)
Check whether the use can be converted to remove a comparison against zero.
static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
static unsigned getLoadRegOpcode(Register DestReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, bool ForLoadFold=false)
static MachineInstr * MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI)
#define GET_ND_IF_ENABLED(OPC)
static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget, bool ForLoadFold=false)
Return true for all instructions that only update the first 32 or 64-bits of the destination register...
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, const X86Subtarget &Subtarget)
static const uint16_t * lookupAVX512(unsigned opcode, unsigned domain, ArrayRef< uint16_t[4]> Table)
static MachineInstr * FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII, int PtrOffset=0)
static unsigned getLoadStoreRegOpcode(Register Reg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI, bool Load)
#define VPERM_CASES(Suffix)
#define FROM_TO_SIZE(A, B, S)
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes)
static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, bool &ClearsOverflowFlag)
Check whether the definition can be converted to remove a comparison against zero.
static bool isHReg(unsigned Reg)
Test if the given register is a physical h register.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode)
static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static MachineBasicBlock * getFallThroughMBB(MachineBasicBlock *MBB, MachineBasicBlock *TBB)
static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineInstr &UserMI, const MachineFunction &MF)
Check if LoadMI is a partial register load that we can't fold into MI because the latter uses content...
static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI)
static cl::opt< bool > PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to" " fuse, but the X86 backend currently can't"), cl::Hidden)
static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx)
static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
This determines which of three possible cases of a three source commute the source indexes correspond...
static bool isX87Reg(unsigned Reg)
Return true if the Reg is X87 register.
static void genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
static unsigned getTruncatedShiftCount(const MachineInstr &MI, unsigned ShiftAmtOperandIdx)
Check whether the shift count for a machine operand is non-zero.
static SmallVector< MachineMemOperand *, 2 > extractStoreMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static unsigned getBroadcastOpcode(const X86FoldTableEntry *I, const TargetRegisterClass *RC, const X86Subtarget &STI)
static unsigned convertALUrr2ALUri(unsigned Opc)
Convert an ALUrr opcode to corresponding ALUri opcode.
static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI)
Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
static bool isCommutableVPERMV3Instruction(unsigned Opcode)
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
static DIExpression * appendExt(const DIExpression *Expr, unsigned FromSize, unsigned ToSize, bool Signed)
Append a zero- or sign-extension to Expr.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Base class for the actual dominator tree node.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
LiveInterval - This class represents the liveness of a register, or stack slot.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
void stepForward(const MachineInstr &MI, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand * > > &Clobbers)
Simulates liveness when stepping forward over an instruction(bundle).
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
bool usesWindowsCFI() const
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Instances of this class represent a single low-level machine instruction.
void setOpcode(unsigned Op)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
Wrapper class representing physical registers. Should be passed by value.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned pred_size() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
@ LQR_Dead
Register is known to be fully dead.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
const Constant * ConstVal
union llvm::MachineConstantPoolEntry::@195 Val
The constant itself.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * getRootNode() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_iterator operands_begin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isSafeToMove(AAResults *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
void dropDebugNumber()
Drop any variable location debugging information associated with this instruction.
void setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just prior to the instruction itself.
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void setFlag(MIFlag Flag)
Set a MI flag.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
unsigned getNumDefs() const
Returns the total number of definitions.
MachineOperand * findRegisterDefOperand(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateCPI(unsigned Idx, int Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const TargetRegisterInfo * getTargetRegisterInfo() const
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A Module instance is used to store all the information related to an LLVM module.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
MachineFunction & getMachineFunction() const
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const
Return true when \P Inst has reassociable operands in the same \P MBB.
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
bool isPositionIndependent() const
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TypeSize getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
SlotIndex def
The index of the defining instruction.
LLVM Value Representation.
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void getFrameIndexOperands(SmallVectorImpl< MachineOperand > &Ops, int FI) const override
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
Check if there exists an earlier instruction that operates on the same source operands and sets eflag...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Overrides the isSchedulingBoundary from Codegen/TargetInstrInfo.cpp to make it capable of identifying...
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
void replaceBranchWithTailCall(MachineBasicBlock &MBB, SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Returns true iff the routine could find two commutable operands in the given machine instruction.
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override
Returns true if we have preference on the operands order in MI, the commute decision is returned in C...
bool hasLiveCondCodeDef(MachineInstr &MI) const
True if MI has a condition code def, e.g.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool canMakeTailCallConditional(SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
convertToThreeAddress - This method must be implemented by targets that set the M_CONVERTIBLE_TO_3_AD...
X86InstrInfo(X86Subtarget &STI)
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
MCInst getNop() const override
Return the noop instruction to use for a noop.
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
This is a used by the pre-regalloc scheduler to determine (in conjunction with areLoadsFromSameBasePt...
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr * > &NewMIs) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
Fold a load or store of the specified stack slot into the specified machine instruction for the speci...
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns, bool DoRegPressureReduce) const override
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isStoreToStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
bool isUnconditionalTailCall(const MachineInstr &MI) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isLoadFromStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const
int getSPAdjust(const MachineInstr &MI) const override
getSPAdjust - This returns the stack pointer adjustment made by this instruction.
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
int getJumpTableIndex(const MachineInstr &MI) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override
This is an architecture-specific helper function of reassociateOps.
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
isCoalescableExtInstr - Return true if the instruction is a "coalescable" extension instruction.
void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Opc, Register Reg, int FrameIdx, bool isKill=false) const
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned LEAOpcode, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV, LiveIntervals *LIS) const
Given an operand within a MachineInstr, insert preceding code to put it into the right format for a p...
bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify=false) const override
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before certain undef register...
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
uint16_t getExecutionDomainCustom(const MachineInstr &MI) const
bool isHighLatencyDef(int opc) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds potential patterns, this function generates the instructions ...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, const X86InstrFMA3Group &FMA3Group) const
Returns an adjusted FMA opcode that must be used in FMA instruction that performs the same computatio...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool preservesZeroValueInReg(const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before a partial register upd...
MachineInstr * optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, Register &FoldAsLoadDefReg, MachineInstr *&DefMI) const override
Try to remove the load by folding it to a register operand at the use.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Register getGlobalBaseReg() const
int getTCReturnAddrDelta() const
void setGlobalBaseReg(Register Reg)
unsigned getNumLocalDynamicTLSAccesses() const
bool getUsesRedZone() const
bool canRealignStack(const MachineFunction &MF) const override
bool isPICStyleGOT() const
bool isTargetWin64() const
const X86InstrInfo * getInstrInfo() const override
const X86RegisterInfo * getRegisterInfo() const override
const X86FrameLowering * getFrameLowering() const override
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ X86
Windows x64, Windows Itanium (IA-64)
Reg
All possible values of the reg field in the ModR/M byte.
bool isKMergeMasked(uint64_t TSFlags)
@ MO_GOT_ABSOLUTE_ADDRESS
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [.
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
@ SSEDomainShift
Execution domain for SSE instructions.
bool canUseApxExtendedReg(const MCInstrDesc &Desc)
bool isPseudo(uint64_t TSFlags)
bool isKMasked(uint64_t TSFlags)
int getMemoryOperandNo(uint64_t TSFlags)
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
CondCode getCondFromBranch(const MachineInstr &MI)
CondCode getCondFromCFCMov(const MachineInstr &MI)
CondCode getCondFromMI(const MachineInstr &MI)
Return the condition code of the instruction.
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
unsigned getSwappedVCMPImm(unsigned Imm)
Get the VCMP immediate if the opcodes are swapped.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getSwappedVPCOMImm(unsigned Imm)
Get the VPCOM immediate if the opcodes are swapped.
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
unsigned getVPCMPImmForCond(ISD::CondCode CC)
Get the VPCMP immediate for the given condition.
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
CondCode getCondFromSETCC(const MachineInstr &MI)
unsigned getSwappedVPCMPImm(unsigned Imm)
Get the VPCMP immediate if the opcodes are swapped.
int getCondSrcNoFromDesc(const MCInstrDesc &MCID)
Return the source operand # for condition code by MCID.
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
unsigned getVectorRegisterWidth(const MCOperandInfo &Info)
Get the width of the vector register operand.
CondCode getCondFromCMov(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
static bool isMem(const MachineInstr &MI, unsigned Op)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
bool getAlign(const Function &F, unsigned index, unsigned &align)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createX86GlobalBaseRegPass()
This pass initializes a global base register for PIC on x86-32.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
FunctionPass * createCleanupLocalDynamicTLSPass()
This pass combines multiple accesses to local-dynamic TLS variables so that the TLS base address for ...
const X86FoldTableEntry * lookupBroadcastFoldTable(unsigned RegOp, unsigned OpNum)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
const X86InstrFMA3Group * getFMA3Group(unsigned Opcode, uint64_t TSFlags)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
const X86FoldTableEntry * lookupTwoAddrFoldTable(unsigned RegOp)
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
unsigned getUndefRegState(bool B)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
const X86FoldTableEntry * lookupUnfoldTable(unsigned MemOp)
constexpr unsigned BitWidth
bool matchBroadcastSize(const X86FoldTableEntry &Entry, unsigned BroadcastBits)
const X86FoldTableEntry * lookupFoldTable(unsigned RegOp, unsigned OpNum)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
This represents a simple continuous liveness interval for a value.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
X86AddressMode - This struct holds a generalized full x86 address mode.
This class is used to group {132, 213, 231} forms of FMA opcodes together.
unsigned get213Opcode() const
Returns the 213 form of FMA opcode.
unsigned get231Opcode() const
Returns the 231 form of FMA opcode.
bool isIntrinsic() const
Returns true iff the group of FMA opcodes holds intrinsic opcodes.
unsigned get132Opcode() const
Returns the 132 form of FMA opcode.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.