52#define DEBUG_TYPE "x86-instr-info"
54#define GET_INSTRINFO_CTOR_DTOR
55#include "X86GenInstrInfo.inc"
59 cl::desc(
"Disable fusing of spill code into instructions"),
63 cl::desc(
"Print instructions that the allocator wants to"
64 " fuse, but the X86 backend currently can't"),
68 cl::desc(
"Re-materialize load from stub in PIC mode"),
72 cl::desc(
"Clearance between two register writes "
73 "for inserting XOR to avoid partial "
77 "undef-reg-clearance",
78 cl::desc(
"How many idle instructions we would like before "
79 "certain undef register reads"),
83void X86InstrInfo::anchor() {}
87 : X86::ADJCALLSTACKDOWN32),
88 (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
89 : X86::ADJCALLSTACKUP32),
90 X86::CATCHRET, (STI.
is64Bit() ? X86::RET64 : X86::RET32)),
91 Subtarget(STI), RI(STI.getTargetTriple()) {}
100 if (!RC || !Subtarget.hasEGPR())
106 switch (RC->getID()) {
109 case X86::GR8RegClassID:
110 return &X86::GR8_NOREX2RegClass;
111 case X86::GR16RegClassID:
112 return &X86::GR16_NOREX2RegClass;
113 case X86::GR32RegClassID:
114 return &X86::GR32_NOREX2RegClass;
115 case X86::GR64RegClassID:
116 return &X86::GR64_NOREX2RegClass;
117 case X86::GR32_NOSPRegClassID:
118 return &X86::GR32_NOREX2_NOSPRegClass;
119 case X86::GR64_NOSPRegClassID:
120 return &X86::GR64_NOREX2_NOSPRegClass;
126 unsigned &SubIdx)
const {
127 switch (
MI.getOpcode()) {
130 case X86::MOVSX16rr8:
131 case X86::MOVZX16rr8:
132 case X86::MOVSX32rr8:
133 case X86::MOVZX32rr8:
134 case X86::MOVSX64rr8:
135 if (!Subtarget.is64Bit())
140 case X86::MOVSX32rr16:
141 case X86::MOVZX32rr16:
142 case X86::MOVSX64rr16:
143 case X86::MOVSX64rr32: {
144 if (
MI.getOperand(0).getSubReg() ||
MI.getOperand(1).getSubReg())
147 SrcReg =
MI.getOperand(1).getReg();
148 DstReg =
MI.getOperand(0).getReg();
149 switch (
MI.getOpcode()) {
152 case X86::MOVSX16rr8:
153 case X86::MOVZX16rr8:
154 case X86::MOVSX32rr8:
155 case X86::MOVZX32rr8:
156 case X86::MOVSX64rr8:
157 SubIdx = X86::sub_8bit;
159 case X86::MOVSX32rr16:
160 case X86::MOVZX32rr16:
161 case X86::MOVSX64rr16:
162 SubIdx = X86::sub_16bit;
164 case X86::MOVSX64rr32:
165 SubIdx = X86::sub_32bit;
175 if (
MI.mayLoad() ||
MI.mayStore())
180 if (
MI.isCopyLike() ||
MI.isInsertSubreg())
183 unsigned Opcode =
MI.getOpcode();
194 if (isBSF(Opcode) || isBSR(Opcode) || isLZCNT(Opcode) || isPOPCNT(Opcode) ||
200 if (isBLCFILL(Opcode) || isBLCI(Opcode) || isBLCIC(Opcode) ||
201 isBLCMSK(Opcode) || isBLCS(Opcode) || isBLSFILL(Opcode) ||
202 isBLSI(Opcode) || isBLSIC(Opcode) || isBLSMSK(Opcode) || isBLSR(Opcode) ||
207 if (isBEXTR(Opcode) || isBZHI(Opcode))
210 if (isROL(Opcode) || isROR(Opcode) || isSAR(Opcode) || isSHL(Opcode) ||
211 isSHR(Opcode) || isSHLD(Opcode) || isSHRD(Opcode))
214 if (isADC(Opcode) || isADD(Opcode) || isAND(Opcode) || isOR(Opcode) ||
215 isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode))
221 if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode))
229 if (isMOVSX(Opcode) || isMOVZX(Opcode) || isMOVSXD(Opcode) || isMOV(Opcode))
232 if (isRORX(Opcode) || isSARX(Opcode) || isSHLX(Opcode) || isSHRX(Opcode))
242 switch (
MI.getOpcode()) {
255 case X86::IMUL64rmi32:
270 case X86::POPCNT16rm:
271 case X86::POPCNT32rm:
272 case X86::POPCNT64rm:
280 case X86::BLCFILL32rm:
281 case X86::BLCFILL64rm:
286 case X86::BLCMSK32rm:
287 case X86::BLCMSK64rm:
290 case X86::BLSFILL32rm:
291 case X86::BLSFILL64rm:
296 case X86::BLSMSK32rm:
297 case X86::BLSMSK64rm:
307 case X86::BEXTRI32mi:
308 case X86::BEXTRI64mi:
361 case X86::CVTTSD2SI64rm:
362 case X86::VCVTTSD2SI64rm:
363 case X86::VCVTTSD2SI64Zrm:
364 case X86::CVTTSD2SIrm:
365 case X86::VCVTTSD2SIrm:
366 case X86::VCVTTSD2SIZrm:
367 case X86::CVTTSS2SI64rm:
368 case X86::VCVTTSS2SI64rm:
369 case X86::VCVTTSS2SI64Zrm:
370 case X86::CVTTSS2SIrm:
371 case X86::VCVTTSS2SIrm:
372 case X86::VCVTTSS2SIZrm:
373 case X86::CVTSI2SDrm:
374 case X86::VCVTSI2SDrm:
375 case X86::VCVTSI2SDZrm:
376 case X86::CVTSI2SSrm:
377 case X86::VCVTSI2SSrm:
378 case X86::VCVTSI2SSZrm:
379 case X86::CVTSI642SDrm:
380 case X86::VCVTSI642SDrm:
381 case X86::VCVTSI642SDZrm:
382 case X86::CVTSI642SSrm:
383 case X86::VCVTSI642SSrm:
384 case X86::VCVTSI642SSZrm:
385 case X86::CVTSS2SDrm:
386 case X86::VCVTSS2SDrm:
387 case X86::VCVTSS2SDZrm:
388 case X86::CVTSD2SSrm:
389 case X86::VCVTSD2SSrm:
390 case X86::VCVTSD2SSZrm:
392 case X86::VCVTTSD2USI64Zrm:
393 case X86::VCVTTSD2USIZrm:
394 case X86::VCVTTSS2USI64Zrm:
395 case X86::VCVTTSS2USIZrm:
396 case X86::VCVTUSI2SDZrm:
397 case X86::VCVTUSI642SDZrm:
398 case X86::VCVTUSI2SSZrm:
399 case X86::VCVTUSI642SSZrm:
403 case X86::MOV8rm_NOREX:
407 case X86::MOVSX16rm8:
408 case X86::MOVSX32rm16:
409 case X86::MOVSX32rm8:
410 case X86::MOVSX32rm8_NOREX:
411 case X86::MOVSX64rm16:
412 case X86::MOVSX64rm32:
413 case X86::MOVSX64rm8:
414 case X86::MOVZX16rm8:
415 case X86::MOVZX32rm16:
416 case X86::MOVZX32rm8:
417 case X86::MOVZX32rm8_NOREX:
418 case X86::MOVZX64rm16:
419 case X86::MOVZX64rm8:
428 if (isFrameInstr(
MI)) {
431 if (!isFrameSetup(
MI))
442 for (
auto E =
MBB->
end();
I != E; ++
I) {
443 if (
I->getOpcode() == getCallFrameDestroyOpcode() ||
I->isCall())
449 if (
I->getOpcode() != getCallFrameDestroyOpcode())
452 return -(
I->getOperand(1).getImm());
457 switch (
MI.getOpcode()) {
476 int &FrameIndex)
const {
496 case X86::KMOVBkm_EVEX:
501 case X86::KMOVWkm_EVEX:
503 case X86::VMOVSHZrm_alt:
508 case X86::MOVSSrm_alt:
510 case X86::VMOVSSrm_alt:
512 case X86::VMOVSSZrm_alt:
514 case X86::KMOVDkm_EVEX:
520 case X86::MOVSDrm_alt:
522 case X86::VMOVSDrm_alt:
524 case X86::VMOVSDZrm_alt:
525 case X86::MMX_MOVD64rm:
526 case X86::MMX_MOVQ64rm:
528 case X86::KMOVQkm_EVEX:
543 case X86::VMOVAPSZ128rm:
544 case X86::VMOVUPSZ128rm:
545 case X86::VMOVAPSZ128rm_NOVLX:
546 case X86::VMOVUPSZ128rm_NOVLX:
547 case X86::VMOVAPDZ128rm:
548 case X86::VMOVUPDZ128rm:
549 case X86::VMOVDQU8Z128rm:
550 case X86::VMOVDQU16Z128rm:
551 case X86::VMOVDQA32Z128rm:
552 case X86::VMOVDQU32Z128rm:
553 case X86::VMOVDQA64Z128rm:
554 case X86::VMOVDQU64Z128rm:
557 case X86::VMOVAPSYrm:
558 case X86::VMOVUPSYrm:
559 case X86::VMOVAPDYrm:
560 case X86::VMOVUPDYrm:
561 case X86::VMOVDQAYrm:
562 case X86::VMOVDQUYrm:
563 case X86::VMOVAPSZ256rm:
564 case X86::VMOVUPSZ256rm:
565 case X86::VMOVAPSZ256rm_NOVLX:
566 case X86::VMOVUPSZ256rm_NOVLX:
567 case X86::VMOVAPDZ256rm:
568 case X86::VMOVUPDZ256rm:
569 case X86::VMOVDQU8Z256rm:
570 case X86::VMOVDQU16Z256rm:
571 case X86::VMOVDQA32Z256rm:
572 case X86::VMOVDQU32Z256rm:
573 case X86::VMOVDQA64Z256rm:
574 case X86::VMOVDQU64Z256rm:
577 case X86::VMOVAPSZrm:
578 case X86::VMOVUPSZrm:
579 case X86::VMOVAPDZrm:
580 case X86::VMOVUPDZrm:
581 case X86::VMOVDQU8Zrm:
582 case X86::VMOVDQU16Zrm:
583 case X86::VMOVDQA32Zrm:
584 case X86::VMOVDQU32Zrm:
585 case X86::VMOVDQA64Zrm:
586 case X86::VMOVDQU64Zrm:
598 case X86::KMOVBmk_EVEX:
603 case X86::KMOVWmk_EVEX:
612 case X86::KMOVDmk_EVEX:
620 case X86::MMX_MOVD64mr:
621 case X86::MMX_MOVQ64mr:
622 case X86::MMX_MOVNTQmr:
624 case X86::KMOVQmk_EVEX:
639 case X86::VMOVUPSZ128mr:
640 case X86::VMOVAPSZ128mr:
641 case X86::VMOVUPSZ128mr_NOVLX:
642 case X86::VMOVAPSZ128mr_NOVLX:
643 case X86::VMOVUPDZ128mr:
644 case X86::VMOVAPDZ128mr:
645 case X86::VMOVDQA32Z128mr:
646 case X86::VMOVDQU32Z128mr:
647 case X86::VMOVDQA64Z128mr:
648 case X86::VMOVDQU64Z128mr:
649 case X86::VMOVDQU8Z128mr:
650 case X86::VMOVDQU16Z128mr:
653 case X86::VMOVUPSYmr:
654 case X86::VMOVAPSYmr:
655 case X86::VMOVUPDYmr:
656 case X86::VMOVAPDYmr:
657 case X86::VMOVDQUYmr:
658 case X86::VMOVDQAYmr:
659 case X86::VMOVUPSZ256mr:
660 case X86::VMOVAPSZ256mr:
661 case X86::VMOVUPSZ256mr_NOVLX:
662 case X86::VMOVAPSZ256mr_NOVLX:
663 case X86::VMOVUPDZ256mr:
664 case X86::VMOVAPDZ256mr:
665 case X86::VMOVDQU8Z256mr:
666 case X86::VMOVDQU16Z256mr:
667 case X86::VMOVDQA32Z256mr:
668 case X86::VMOVDQU32Z256mr:
669 case X86::VMOVDQA64Z256mr:
670 case X86::VMOVDQU64Z256mr:
673 case X86::VMOVUPSZmr:
674 case X86::VMOVAPSZmr:
675 case X86::VMOVUPDZmr:
676 case X86::VMOVAPDZmr:
677 case X86::VMOVDQU8Zmr:
678 case X86::VMOVDQU16Zmr:
679 case X86::VMOVDQA32Zmr:
680 case X86::VMOVDQU32Zmr:
681 case X86::VMOVDQA64Zmr:
682 case X86::VMOVDQU64Zmr:
690 int &FrameIndex)
const {
697 unsigned &MemBytes)
const {
699 if (
MI.getOperand(0).getSubReg() == 0 && isFrameOperand(
MI, 1, FrameIndex))
700 return MI.getOperand(0).getReg();
705 int &FrameIndex)
const {
713 if (hasLoadFromStackSlot(
MI, Accesses)) {
715 cast<FixedStackPseudoSourceValue>(Accesses.
front()->getPseudoValue())
717 return MI.getOperand(0).getReg();
724 int &FrameIndex)
const {
731 unsigned &MemBytes)
const {
734 isFrameOperand(
MI, 0, FrameIndex))
740 int &FrameIndex)
const {
748 if (hasStoreToStackSlot(
MI, Accesses)) {
750 cast<FixedStackPseudoSourceValue>(Accesses.
front()->getPseudoValue())
763 bool isPICBase =
false;
765 if (
DefMI.getOpcode() != X86::MOVPC32r)
767 assert(!isPICBase &&
"More than one PIC base?");
775 switch (
MI.getOpcode()) {
781 case X86::IMPLICIT_DEF:
784 case X86::LOAD_STACK_GUARD:
791 case X86::AVX1_SETALLONES:
792 case X86::AVX2_SETALLONES:
793 case X86::AVX512_128_SET0:
794 case X86::AVX512_256_SET0:
795 case X86::AVX512_512_SET0:
796 case X86::AVX512_512_SETALLONES:
797 case X86::AVX512_FsFLD0SD:
798 case X86::AVX512_FsFLD0SH:
799 case X86::AVX512_FsFLD0SS:
800 case X86::AVX512_FsFLD0F128:
805 case X86::FsFLD0F128:
813 case X86::MOV32ImmSExti8:
818 case X86::MOV64ImmSExti8:
820 case X86::V_SETALLONES:
826 case X86::PTILEZEROV:
830 case X86::MOV8rm_NOREX:
835 case X86::MOVSSrm_alt:
837 case X86::MOVSDrm_alt:
845 case X86::VMOVSSrm_alt:
847 case X86::VMOVSDrm_alt:
854 case X86::VMOVAPSYrm:
855 case X86::VMOVUPSYrm:
856 case X86::VMOVAPDYrm:
857 case X86::VMOVUPDYrm:
858 case X86::VMOVDQAYrm:
859 case X86::VMOVDQUYrm:
860 case X86::MMX_MOVD64rm:
861 case X86::MMX_MOVQ64rm:
862 case X86::VBROADCASTSSrm:
863 case X86::VBROADCASTSSYrm:
864 case X86::VBROADCASTSDYrm:
866 case X86::VPBROADCASTBZ128rm:
867 case X86::VPBROADCASTBZ256rm:
868 case X86::VPBROADCASTBZrm:
869 case X86::VBROADCASTF32X2Z256rm:
870 case X86::VBROADCASTF32X2Zrm:
871 case X86::VBROADCASTI32X2Z128rm:
872 case X86::VBROADCASTI32X2Z256rm:
873 case X86::VBROADCASTI32X2Zrm:
874 case X86::VPBROADCASTWZ128rm:
875 case X86::VPBROADCASTWZ256rm:
876 case X86::VPBROADCASTWZrm:
877 case X86::VPBROADCASTDZ128rm:
878 case X86::VPBROADCASTDZ256rm:
879 case X86::VPBROADCASTDZrm:
880 case X86::VBROADCASTSSZ128rm:
881 case X86::VBROADCASTSSZ256rm:
882 case X86::VBROADCASTSSZrm:
883 case X86::VPBROADCASTQZ128rm:
884 case X86::VPBROADCASTQZ256rm:
885 case X86::VPBROADCASTQZrm:
886 case X86::VBROADCASTSDZ256rm:
887 case X86::VBROADCASTSDZrm:
889 case X86::VMOVSSZrm_alt:
891 case X86::VMOVSDZrm_alt:
893 case X86::VMOVSHZrm_alt:
894 case X86::VMOVAPDZ128rm:
895 case X86::VMOVAPDZ256rm:
896 case X86::VMOVAPDZrm:
897 case X86::VMOVAPSZ128rm:
898 case X86::VMOVAPSZ256rm:
899 case X86::VMOVAPSZ128rm_NOVLX:
900 case X86::VMOVAPSZ256rm_NOVLX:
901 case X86::VMOVAPSZrm:
902 case X86::VMOVDQA32Z128rm:
903 case X86::VMOVDQA32Z256rm:
904 case X86::VMOVDQA32Zrm:
905 case X86::VMOVDQA64Z128rm:
906 case X86::VMOVDQA64Z256rm:
907 case X86::VMOVDQA64Zrm:
908 case X86::VMOVDQU16Z128rm:
909 case X86::VMOVDQU16Z256rm:
910 case X86::VMOVDQU16Zrm:
911 case X86::VMOVDQU32Z128rm:
912 case X86::VMOVDQU32Z256rm:
913 case X86::VMOVDQU32Zrm:
914 case X86::VMOVDQU64Z128rm:
915 case X86::VMOVDQU64Z256rm:
916 case X86::VMOVDQU64Zrm:
917 case X86::VMOVDQU8Z128rm:
918 case X86::VMOVDQU8Z256rm:
919 case X86::VMOVDQU8Zrm:
920 case X86::VMOVUPDZ128rm:
921 case X86::VMOVUPDZ256rm:
922 case X86::VMOVUPDZrm:
923 case X86::VMOVUPSZ128rm:
924 case X86::VMOVUPSZ256rm:
925 case X86::VMOVUPSZ128rm_NOVLX:
926 case X86::VMOVUPSZ256rm_NOVLX:
927 case X86::VMOVUPSZrm: {
933 MI.isDereferenceableInvariantLoad()) {
935 if (BaseReg == 0 || BaseReg == X86::RIP)
1013 if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS &&
1023 unsigned ShiftAmtOperandIdx) {
1025 unsigned ShiftCountMask = (
MI.getDesc().TSFlags &
X86II::REX_W) ? 63 : 31;
1026 unsigned Imm =
MI.getOperand(ShiftAmtOperandIdx).getImm();
1027 return Imm & ShiftCountMask;
1038 return ShAmt < 4 && ShAmt > 0;
1046 bool &NoSignFlag,
bool &ClearsOverflowFlag) {
1047 if (!(CmpValDefInstr.
getOpcode() == X86::SUBREG_TO_REG &&
1048 CmpInstr.
getOpcode() == X86::TEST64rr) &&
1049 !(CmpValDefInstr.
getOpcode() == X86::COPY &&
1057 "CmpInstr is an analyzable TEST16rr/TEST64rr, and "
1058 "`X86InstrInfo::analyzeCompare` requires two reg operands are the"
1067 "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
1068 "is a user of COPY sub16bit.");
1070 if (CmpInstr.
getOpcode() == X86::TEST16rr) {
1079 if (!((VregDefInstr->
getOpcode() == X86::AND32ri ||
1080 VregDefInstr->
getOpcode() == X86::AND64ri32) &&
1085 if (CmpInstr.
getOpcode() == X86::TEST64rr) {
1099 assert(VregDefInstr &&
"Must have a definition (SSA)");
1109 if (X86::isAND(VregDefInstr->
getOpcode())) {
1129 if (Instr.modifiesRegister(X86::EFLAGS,
TRI))
1133 *AndInstr = VregDefInstr;
1154 ClearsOverflowFlag =
true;
1161 unsigned Opc,
bool AllowSP,
Register &NewSrc,
1167 RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
1169 RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
1172 isKill =
MI.killsRegister(SrcReg,
nullptr);
1176 if (Opc != X86::LEA64_32r) {
1178 assert(!Src.isUndef() &&
"Undef op doesn't need optimization");
1194 assert(!Src.isUndef() &&
"Undef op doesn't need optimization");
1224MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned MIOpc,
1228 bool Is8BitOp)
const {
1235 "Unexpected type for LEA transform");
1244 if (!Subtarget.is64Bit())
1247 unsigned Opcode = X86::LEA64_32r;
1263 bool IsDead =
MI.getOperand(0).isDead();
1264 bool IsKill =
MI.getOperand(1).isKill();
1265 unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
1266 assert(!
MI.getOperand(1).isUndef() &&
"Undef op doesn't need optimization");
1282 case X86::SHL16ri: {
1283 unsigned ShAmt =
MI.getOperand(2).getImm();
1300 case X86::ADD8ri_DB:
1302 case X86::ADD16ri_DB:
1306 case X86::ADD8rr_DB:
1308 case X86::ADD16rr_DB: {
1309 Src2 =
MI.getOperand(2).getReg();
1310 bool IsKill2 =
MI.getOperand(2).isKill();
1311 assert(!
MI.getOperand(2).isUndef() &&
"Undef op doesn't need optimization");
1315 addRegReg(MIB, InRegLEA,
true, InRegLEA,
false);
1317 if (Subtarget.is64Bit())
1318 InRegLEA2 =
RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1320 InRegLEA2 =
RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1323 ImpDef2 =
BuildMI(
MBB, &*MIB,
MI.getDebugLoc(),
get(X86::IMPLICIT_DEF),
1325 InsMI2 =
BuildMI(
MBB, &*MIB,
MI.getDebugLoc(),
get(TargetOpcode::COPY))
1328 addRegReg(MIB, InRegLEA,
true, InRegLEA2,
true);
1330 if (LV && IsKill2 && InsMI2)
1426 if (
MI.getNumOperands() > 2)
1427 if (
MI.getOperand(2).isReg() &&
MI.getOperand(2).isUndef())
1432 bool Is64Bit = Subtarget.is64Bit();
1434 bool Is8BitOp =
false;
1435 unsigned NumRegOperands = 2;
1436 unsigned MIOpc =
MI.getOpcode();
1440 case X86::SHL64ri: {
1441 assert(
MI.getNumOperands() >= 3 &&
"Unknown shift instruction!");
1448 Src.getReg(), &X86::GR64_NOSPRegClass))
1451 NewMI =
BuildMI(MF,
MI.getDebugLoc(),
get(X86::LEA64r))
1460 case X86::SHL32ri: {
1461 assert(
MI.getNumOperands() >= 3 &&
"Unknown shift instruction!");
1466 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1472 ImplicitOp, LV, LIS))
1482 if (ImplicitOp.
getReg() != 0)
1483 MIB.
add(ImplicitOp);
1487 if (LV && SrcReg != Src.getReg())
1494 case X86::SHL16ri: {
1495 assert(
MI.getNumOperands() >= 3 &&
"Unknown shift instruction!");
1499 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1503 assert(
MI.getNumOperands() >= 2 &&
"Unknown inc instruction!");
1504 unsigned Opc = MIOpc == X86::INC64r
1506 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1510 ImplicitOp, LV, LIS))
1516 if (ImplicitOp.
getReg() != 0)
1517 MIB.
add(ImplicitOp);
1522 if (LV && SrcReg != Src.getReg())
1528 assert(
MI.getNumOperands() >= 2 &&
"Unknown dec instruction!");
1529 unsigned Opc = MIOpc == X86::DEC64r
1531 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1536 ImplicitOp, LV, LIS))
1542 if (ImplicitOp.
getReg() != 0)
1543 MIB.
add(ImplicitOp);
1548 if (LV && SrcReg != Src.getReg())
1558 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1560 case X86::ADD64rr_DB:
1562 case X86::ADD32rr_DB: {
1563 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1565 if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
1568 Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1574 ImplicitOp2, LV, LIS))
1579 if (Src.getReg() == Src2.
getReg()) {
1586 ImplicitOp, LV, LIS))
1591 if (ImplicitOp.
getReg() != 0)
1592 MIB.
add(ImplicitOp);
1593 if (ImplicitOp2.
getReg() != 0)
1594 MIB.
add(ImplicitOp2);
1596 NewMI =
addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
1600 if (SrcReg2 != Src2.
getReg())
1602 if (SrcReg != SrcReg2 && SrcReg != Src.getReg())
1609 case X86::ADD8rr_DB:
1613 case X86::ADD16rr_DB:
1614 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1615 case X86::ADD64ri32:
1616 case X86::ADD64ri32_DB:
1617 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1619 BuildMI(MF,
MI.getDebugLoc(),
get(X86::LEA64r)).add(Dest).add(Src),
1623 case X86::ADD32ri_DB: {
1624 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1625 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1630 ImplicitOp, LV, LIS))
1636 if (ImplicitOp.
getReg() != 0)
1637 MIB.
add(ImplicitOp);
1642 if (LV && SrcReg != Src.getReg())
1647 case X86::ADD8ri_DB:
1651 case X86::ADD16ri_DB:
1652 return convertToThreeAddressWithLEA(MIOpc,
MI, LV, LIS, Is8BitOp);
1657 case X86::SUB32ri: {
1658 if (!
MI.getOperand(2).isImm())
1660 int64_t Imm =
MI.getOperand(2).getImm();
1661 if (!isInt<32>(-Imm))
1664 assert(
MI.getNumOperands() >= 3 &&
"Unknown add instruction!");
1665 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1670 ImplicitOp, LV, LIS))
1676 if (ImplicitOp.
getReg() != 0)
1677 MIB.
add(ImplicitOp);
1682 if (LV && SrcReg != Src.getReg())
1687 case X86::SUB64ri32: {
1688 if (!
MI.getOperand(2).isImm())
1690 int64_t Imm =
MI.getOperand(2).getImm();
1691 if (!isInt<32>(-Imm))
1694 assert(
MI.getNumOperands() >= 3 &&
"Unknown sub instruction!");
1702 case X86::VMOVDQU8Z128rmk:
1703 case X86::VMOVDQU8Z256rmk:
1704 case X86::VMOVDQU8Zrmk:
1705 case X86::VMOVDQU16Z128rmk:
1706 case X86::VMOVDQU16Z256rmk:
1707 case X86::VMOVDQU16Zrmk:
1708 case X86::VMOVDQU32Z128rmk:
1709 case X86::VMOVDQA32Z128rmk:
1710 case X86::VMOVDQU32Z256rmk:
1711 case X86::VMOVDQA32Z256rmk:
1712 case X86::VMOVDQU32Zrmk:
1713 case X86::VMOVDQA32Zrmk:
1714 case X86::VMOVDQU64Z128rmk:
1715 case X86::VMOVDQA64Z128rmk:
1716 case X86::VMOVDQU64Z256rmk:
1717 case X86::VMOVDQA64Z256rmk:
1718 case X86::VMOVDQU64Zrmk:
1719 case X86::VMOVDQA64Zrmk:
1720 case X86::VMOVUPDZ128rmk:
1721 case X86::VMOVAPDZ128rmk:
1722 case X86::VMOVUPDZ256rmk:
1723 case X86::VMOVAPDZ256rmk:
1724 case X86::VMOVUPDZrmk:
1725 case X86::VMOVAPDZrmk:
1726 case X86::VMOVUPSZ128rmk:
1727 case X86::VMOVAPSZ128rmk:
1728 case X86::VMOVUPSZ256rmk:
1729 case X86::VMOVAPSZ256rmk:
1730 case X86::VMOVUPSZrmk:
1731 case X86::VMOVAPSZrmk:
1732 case X86::VBROADCASTSDZ256rmk:
1733 case X86::VBROADCASTSDZrmk:
1734 case X86::VBROADCASTSSZ128rmk:
1735 case X86::VBROADCASTSSZ256rmk:
1736 case X86::VBROADCASTSSZrmk:
1737 case X86::VPBROADCASTDZ128rmk:
1738 case X86::VPBROADCASTDZ256rmk:
1739 case X86::VPBROADCASTDZrmk:
1740 case X86::VPBROADCASTQZ128rmk:
1741 case X86::VPBROADCASTQZ256rmk:
1742 case X86::VPBROADCASTQZrmk: {
1747 case X86::VMOVDQU8Z128rmk:
1748 Opc = X86::VPBLENDMBZ128rmk;
1750 case X86::VMOVDQU8Z256rmk:
1751 Opc = X86::VPBLENDMBZ256rmk;
1753 case X86::VMOVDQU8Zrmk:
1754 Opc = X86::VPBLENDMBZrmk;
1756 case X86::VMOVDQU16Z128rmk:
1757 Opc = X86::VPBLENDMWZ128rmk;
1759 case X86::VMOVDQU16Z256rmk:
1760 Opc = X86::VPBLENDMWZ256rmk;
1762 case X86::VMOVDQU16Zrmk:
1763 Opc = X86::VPBLENDMWZrmk;
1765 case X86::VMOVDQU32Z128rmk:
1766 Opc = X86::VPBLENDMDZ128rmk;
1768 case X86::VMOVDQU32Z256rmk:
1769 Opc = X86::VPBLENDMDZ256rmk;
1771 case X86::VMOVDQU32Zrmk:
1772 Opc = X86::VPBLENDMDZrmk;
1774 case X86::VMOVDQU64Z128rmk:
1775 Opc = X86::VPBLENDMQZ128rmk;
1777 case X86::VMOVDQU64Z256rmk:
1778 Opc = X86::VPBLENDMQZ256rmk;
1780 case X86::VMOVDQU64Zrmk:
1781 Opc = X86::VPBLENDMQZrmk;
1783 case X86::VMOVUPDZ128rmk:
1784 Opc = X86::VBLENDMPDZ128rmk;
1786 case X86::VMOVUPDZ256rmk:
1787 Opc = X86::VBLENDMPDZ256rmk;
1789 case X86::VMOVUPDZrmk:
1790 Opc = X86::VBLENDMPDZrmk;
1792 case X86::VMOVUPSZ128rmk:
1793 Opc = X86::VBLENDMPSZ128rmk;
1795 case X86::VMOVUPSZ256rmk:
1796 Opc = X86::VBLENDMPSZ256rmk;
1798 case X86::VMOVUPSZrmk:
1799 Opc = X86::VBLENDMPSZrmk;
1801 case X86::VMOVDQA32Z128rmk:
1802 Opc = X86::VPBLENDMDZ128rmk;
1804 case X86::VMOVDQA32Z256rmk:
1805 Opc = X86::VPBLENDMDZ256rmk;
1807 case X86::VMOVDQA32Zrmk:
1808 Opc = X86::VPBLENDMDZrmk;
1810 case X86::VMOVDQA64Z128rmk:
1811 Opc = X86::VPBLENDMQZ128rmk;
1813 case X86::VMOVDQA64Z256rmk:
1814 Opc = X86::VPBLENDMQZ256rmk;
1816 case X86::VMOVDQA64Zrmk:
1817 Opc = X86::VPBLENDMQZrmk;
1819 case X86::VMOVAPDZ128rmk:
1820 Opc = X86::VBLENDMPDZ128rmk;
1822 case X86::VMOVAPDZ256rmk:
1823 Opc = X86::VBLENDMPDZ256rmk;
1825 case X86::VMOVAPDZrmk:
1826 Opc = X86::VBLENDMPDZrmk;
1828 case X86::VMOVAPSZ128rmk:
1829 Opc = X86::VBLENDMPSZ128rmk;
1831 case X86::VMOVAPSZ256rmk:
1832 Opc = X86::VBLENDMPSZ256rmk;
1834 case X86::VMOVAPSZrmk:
1835 Opc = X86::VBLENDMPSZrmk;
1837 case X86::VBROADCASTSDZ256rmk:
1838 Opc = X86::VBLENDMPDZ256rmbk;
1840 case X86::VBROADCASTSDZrmk:
1841 Opc = X86::VBLENDMPDZrmbk;
1843 case X86::VBROADCASTSSZ128rmk:
1844 Opc = X86::VBLENDMPSZ128rmbk;
1846 case X86::VBROADCASTSSZ256rmk:
1847 Opc = X86::VBLENDMPSZ256rmbk;
1849 case X86::VBROADCASTSSZrmk:
1850 Opc = X86::VBLENDMPSZrmbk;
1852 case X86::VPBROADCASTDZ128rmk:
1853 Opc = X86::VPBLENDMDZ128rmbk;
1855 case X86::VPBROADCASTDZ256rmk:
1856 Opc = X86::VPBLENDMDZ256rmbk;
1858 case X86::VPBROADCASTDZrmk:
1859 Opc = X86::VPBLENDMDZrmbk;
1861 case X86::VPBROADCASTQZ128rmk:
1862 Opc = X86::VPBLENDMQZ128rmbk;
1864 case X86::VPBROADCASTQZ256rmk:
1865 Opc = X86::VPBLENDMQZ256rmbk;
1867 case X86::VPBROADCASTQZrmk:
1868 Opc = X86::VPBLENDMQZrmbk;
1874 .
add(
MI.getOperand(2))
1876 .
add(
MI.getOperand(3))
1877 .
add(
MI.getOperand(4))
1878 .
add(
MI.getOperand(5))
1879 .
add(
MI.getOperand(6))
1880 .
add(
MI.getOperand(7));
1885 case X86::VMOVDQU8Z128rrk:
1886 case X86::VMOVDQU8Z256rrk:
1887 case X86::VMOVDQU8Zrrk:
1888 case X86::VMOVDQU16Z128rrk:
1889 case X86::VMOVDQU16Z256rrk:
1890 case X86::VMOVDQU16Zrrk:
1891 case X86::VMOVDQU32Z128rrk:
1892 case X86::VMOVDQA32Z128rrk:
1893 case X86::VMOVDQU32Z256rrk:
1894 case X86::VMOVDQA32Z256rrk:
1895 case X86::VMOVDQU32Zrrk:
1896 case X86::VMOVDQA32Zrrk:
1897 case X86::VMOVDQU64Z128rrk:
1898 case X86::VMOVDQA64Z128rrk:
1899 case X86::VMOVDQU64Z256rrk:
1900 case X86::VMOVDQA64Z256rrk:
1901 case X86::VMOVDQU64Zrrk:
1902 case X86::VMOVDQA64Zrrk:
1903 case X86::VMOVUPDZ128rrk:
1904 case X86::VMOVAPDZ128rrk:
1905 case X86::VMOVUPDZ256rrk:
1906 case X86::VMOVAPDZ256rrk:
1907 case X86::VMOVUPDZrrk:
1908 case X86::VMOVAPDZrrk:
1909 case X86::VMOVUPSZ128rrk:
1910 case X86::VMOVAPSZ128rrk:
1911 case X86::VMOVUPSZ256rrk:
1912 case X86::VMOVAPSZ256rrk:
1913 case X86::VMOVUPSZrrk:
1914 case X86::VMOVAPSZrrk: {
1919 case X86::VMOVDQU8Z128rrk:
1920 Opc = X86::VPBLENDMBZ128rrk;
1922 case X86::VMOVDQU8Z256rrk:
1923 Opc = X86::VPBLENDMBZ256rrk;
1925 case X86::VMOVDQU8Zrrk:
1926 Opc = X86::VPBLENDMBZrrk;
1928 case X86::VMOVDQU16Z128rrk:
1929 Opc = X86::VPBLENDMWZ128rrk;
1931 case X86::VMOVDQU16Z256rrk:
1932 Opc = X86::VPBLENDMWZ256rrk;
1934 case X86::VMOVDQU16Zrrk:
1935 Opc = X86::VPBLENDMWZrrk;
1937 case X86::VMOVDQU32Z128rrk:
1938 Opc = X86::VPBLENDMDZ128rrk;
1940 case X86::VMOVDQU32Z256rrk:
1941 Opc = X86::VPBLENDMDZ256rrk;
1943 case X86::VMOVDQU32Zrrk:
1944 Opc = X86::VPBLENDMDZrrk;
1946 case X86::VMOVDQU64Z128rrk:
1947 Opc = X86::VPBLENDMQZ128rrk;
1949 case X86::VMOVDQU64Z256rrk:
1950 Opc = X86::VPBLENDMQZ256rrk;
1952 case X86::VMOVDQU64Zrrk:
1953 Opc = X86::VPBLENDMQZrrk;
1955 case X86::VMOVUPDZ128rrk:
1956 Opc = X86::VBLENDMPDZ128rrk;
1958 case X86::VMOVUPDZ256rrk:
1959 Opc = X86::VBLENDMPDZ256rrk;
1961 case X86::VMOVUPDZrrk:
1962 Opc = X86::VBLENDMPDZrrk;
1964 case X86::VMOVUPSZ128rrk:
1965 Opc = X86::VBLENDMPSZ128rrk;
1967 case X86::VMOVUPSZ256rrk:
1968 Opc = X86::VBLENDMPSZ256rrk;
1970 case X86::VMOVUPSZrrk:
1971 Opc = X86::VBLENDMPSZrrk;
1973 case X86::VMOVDQA32Z128rrk:
1974 Opc = X86::VPBLENDMDZ128rrk;
1976 case X86::VMOVDQA32Z256rrk:
1977 Opc = X86::VPBLENDMDZ256rrk;
1979 case X86::VMOVDQA32Zrrk:
1980 Opc = X86::VPBLENDMDZrrk;
1982 case X86::VMOVDQA64Z128rrk:
1983 Opc = X86::VPBLENDMQZ128rrk;
1985 case X86::VMOVDQA64Z256rrk:
1986 Opc = X86::VPBLENDMQZ256rrk;
1988 case X86::VMOVDQA64Zrrk:
1989 Opc = X86::VPBLENDMQZrrk;
1991 case X86::VMOVAPDZ128rrk:
1992 Opc = X86::VBLENDMPDZ128rrk;
1994 case X86::VMOVAPDZ256rrk:
1995 Opc = X86::VBLENDMPDZ256rrk;
1997 case X86::VMOVAPDZrrk:
1998 Opc = X86::VBLENDMPDZrrk;
2000 case X86::VMOVAPSZ128rrk:
2001 Opc = X86::VBLENDMPSZ128rrk;
2003 case X86::VMOVAPSZ256rrk:
2004 Opc = X86::VBLENDMPSZ256rrk;
2006 case X86::VMOVAPSZrrk:
2007 Opc = X86::VBLENDMPSZrrk;
2013 .
add(
MI.getOperand(2))
2015 .
add(
MI.getOperand(3));
2025 for (
unsigned I = 0;
I < NumRegOperands; ++
I) {
2027 if (
Op.isReg() && (
Op.isDead() ||
Op.isKill()))
2054 unsigned SrcOpIdx2) {
2056 if (SrcOpIdx1 > SrcOpIdx2)
2059 unsigned Op1 = 1, Op2 = 2, Op3 = 3;
2065 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
2067 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
2069 if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
2078 unsigned Opc =
MI.getOpcode();
2087 "Intrinsic instructions can't commute operand 1");
2092 assert(Case < 3 &&
"Unexpected case number!");
2097 const unsigned Form132Index = 0;
2098 const unsigned Form213Index = 1;
2099 const unsigned Form231Index = 2;
2100 static const unsigned FormMapping[][3] = {
2105 {Form231Index, Form213Index, Form132Index},
2110 {Form132Index, Form231Index, Form213Index},
2115 {Form213Index, Form132Index, Form231Index}};
2117 unsigned FMAForms[3];
2123 for (
unsigned FormIndex = 0; FormIndex < 3; FormIndex++)
2124 if (Opc == FMAForms[FormIndex])
2125 return FMAForms[FormMapping[Case][FormIndex]];
2131 unsigned SrcOpIdx2) {
2135 assert(Case < 3 &&
"Unexpected case value!");
2138 static const uint8_t SwapMasks[3][4] = {
2139 {0x04, 0x10, 0x08, 0x20},
2140 {0x02, 0x10, 0x08, 0x40},
2141 {0x02, 0x04, 0x20, 0x40},
2144 uint8_t Imm =
MI.getOperand(
MI.getNumOperands() - 1).getImm();
2146 uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
2147 SwapMasks[Case][2] | SwapMasks[Case][3]);
2149 if (Imm & SwapMasks[Case][0])
2150 NewImm |= SwapMasks[Case][1];
2151 if (Imm & SwapMasks[Case][1])
2152 NewImm |= SwapMasks[Case][0];
2153 if (Imm & SwapMasks[Case][2])
2154 NewImm |= SwapMasks[Case][3];
2155 if (Imm & SwapMasks[Case][3])
2156 NewImm |= SwapMasks[Case][2];
2157 MI.getOperand(
MI.getNumOperands() - 1).setImm(NewImm);
2163#define VPERM_CASES(Suffix) \
2164 case X86::VPERMI2##Suffix##Z128rr: \
2165 case X86::VPERMT2##Suffix##Z128rr: \
2166 case X86::VPERMI2##Suffix##Z256rr: \
2167 case X86::VPERMT2##Suffix##Z256rr: \
2168 case X86::VPERMI2##Suffix##Zrr: \
2169 case X86::VPERMT2##Suffix##Zrr: \
2170 case X86::VPERMI2##Suffix##Z128rm: \
2171 case X86::VPERMT2##Suffix##Z128rm: \
2172 case X86::VPERMI2##Suffix##Z256rm: \
2173 case X86::VPERMT2##Suffix##Z256rm: \
2174 case X86::VPERMI2##Suffix##Zrm: \
2175 case X86::VPERMT2##Suffix##Zrm: \
2176 case X86::VPERMI2##Suffix##Z128rrkz: \
2177 case X86::VPERMT2##Suffix##Z128rrkz: \
2178 case X86::VPERMI2##Suffix##Z256rrkz: \
2179 case X86::VPERMT2##Suffix##Z256rrkz: \
2180 case X86::VPERMI2##Suffix##Zrrkz: \
2181 case X86::VPERMT2##Suffix##Zrrkz: \
2182 case X86::VPERMI2##Suffix##Z128rmkz: \
2183 case X86::VPERMT2##Suffix##Z128rmkz: \
2184 case X86::VPERMI2##Suffix##Z256rmkz: \
2185 case X86::VPERMT2##Suffix##Z256rmkz: \
2186 case X86::VPERMI2##Suffix##Zrmkz: \
2187 case X86::VPERMT2##Suffix##Zrmkz:
2189#define VPERM_CASES_BROADCAST(Suffix) \
2190 VPERM_CASES(Suffix) \
2191 case X86::VPERMI2##Suffix##Z128rmb: \
2192 case X86::VPERMT2##Suffix##Z128rmb: \
2193 case X86::VPERMI2##Suffix##Z256rmb: \
2194 case X86::VPERMT2##Suffix##Z256rmb: \
2195 case X86::VPERMI2##Suffix##Zrmb: \
2196 case X86::VPERMT2##Suffix##Zrmb: \
2197 case X86::VPERMI2##Suffix##Z128rmbkz: \
2198 case X86::VPERMT2##Suffix##Z128rmbkz: \
2199 case X86::VPERMI2##Suffix##Z256rmbkz: \
2200 case X86::VPERMT2##Suffix##Z256rmbkz: \
2201 case X86::VPERMI2##Suffix##Zrmbkz: \
2202 case X86::VPERMT2##Suffix##Zrmbkz:
2215#undef VPERM_CASES_BROADCAST
2222#define VPERM_CASES(Orig, New) \
2223 case X86::Orig##Z128rr: \
2224 return X86::New##Z128rr; \
2225 case X86::Orig##Z128rrkz: \
2226 return X86::New##Z128rrkz; \
2227 case X86::Orig##Z128rm: \
2228 return X86::New##Z128rm; \
2229 case X86::Orig##Z128rmkz: \
2230 return X86::New##Z128rmkz; \
2231 case X86::Orig##Z256rr: \
2232 return X86::New##Z256rr; \
2233 case X86::Orig##Z256rrkz: \
2234 return X86::New##Z256rrkz; \
2235 case X86::Orig##Z256rm: \
2236 return X86::New##Z256rm; \
2237 case X86::Orig##Z256rmkz: \
2238 return X86::New##Z256rmkz; \
2239 case X86::Orig##Zrr: \
2240 return X86::New##Zrr; \
2241 case X86::Orig##Zrrkz: \
2242 return X86::New##Zrrkz; \
2243 case X86::Orig##Zrm: \
2244 return X86::New##Zrm; \
2245 case X86::Orig##Zrmkz: \
2246 return X86::New##Zrmkz;
2248#define VPERM_CASES_BROADCAST(Orig, New) \
2249 VPERM_CASES(Orig, New) \
2250 case X86::Orig##Z128rmb: \
2251 return X86::New##Z128rmb; \
2252 case X86::Orig##Z128rmbkz: \
2253 return X86::New##Z128rmbkz; \
2254 case X86::Orig##Z256rmb: \
2255 return X86::New##Z256rmb; \
2256 case X86::Orig##Z256rmbkz: \
2257 return X86::New##Z256rmbkz; \
2258 case X86::Orig##Zrmb: \
2259 return X86::New##Zrmb; \
2260 case X86::Orig##Zrmbkz: \
2261 return X86::New##Zrmbkz;
2279#undef VPERM_CASES_BROADCAST
2285 unsigned OpIdx2)
const {
2287 return std::exchange(NewMI,
false)
2288 ?
MI.getParent()->getParent()->CloneMachineInstr(&
MI)
2292 unsigned Opc =
MI.getOpcode();
2294#define CASE_ND(OP) \
2310#define FROM_TO_SIZE(A, B, S) \
2316 Opc = X86::B##_ND; \
2324 Opc = X86::A##_ND; \
2333 WorkingMI = CloneIfNew(
MI);
2342 WorkingMI = CloneIfNew(
MI);
2344 get(X86::PFSUBRrr == Opc ? X86::PFSUBrr : X86::PFSUBRrr));
2346 case X86::BLENDPDrri:
2347 case X86::BLENDPSrri:
2348 case X86::VBLENDPDrri:
2349 case X86::VBLENDPSrri:
2351 if (
MI.getParent()->getParent()->getFunction().hasOptSize()) {
2352 unsigned Mask = (Opc == X86::BLENDPDrri || Opc == X86::VBLENDPDrri) ? 0x03: 0x0F;
2353 if ((
MI.getOperand(3).getImm() ^ Mask) == 1) {
2354#define FROM_TO(FROM, TO) \
2363 FROM_TO(VBLENDPDrri, VMOVSDrr)
2364 FROM_TO(VBLENDPSrri, VMOVSSrr)
2366 WorkingMI = CloneIfNew(
MI);
2374 case X86::PBLENDWrri:
2375 case X86::VBLENDPDYrri:
2376 case X86::VBLENDPSYrri:
2377 case X86::VPBLENDDrri:
2378 case X86::VPBLENDWrri:
2379 case X86::VPBLENDDYrri:
2380 case X86::VPBLENDWYrri: {
2385 case X86::BLENDPDrri:
2386 Mask = (int8_t)0x03;
2388 case X86::BLENDPSrri:
2389 Mask = (int8_t)0x0F;
2391 case X86::PBLENDWrri:
2392 Mask = (int8_t)0xFF;
2394 case X86::VBLENDPDrri:
2395 Mask = (int8_t)0x03;
2397 case X86::VBLENDPSrri:
2398 Mask = (int8_t)0x0F;
2400 case X86::VBLENDPDYrri:
2401 Mask = (int8_t)0x0F;
2403 case X86::VBLENDPSYrri:
2404 Mask = (int8_t)0xFF;
2406 case X86::VPBLENDDrri:
2407 Mask = (int8_t)0x0F;
2409 case X86::VPBLENDWrri:
2410 Mask = (int8_t)0xFF;
2412 case X86::VPBLENDDYrri:
2413 Mask = (int8_t)0xFF;
2415 case X86::VPBLENDWYrri:
2416 Mask = (int8_t)0xFF;
2422 int8_t Imm =
MI.getOperand(3).getImm() & Mask;
2423 WorkingMI = CloneIfNew(
MI);
2427 case X86::INSERTPSrr:
2428 case X86::VINSERTPSrr:
2429 case X86::VINSERTPSZrr: {
2430 unsigned Imm =
MI.getOperand(
MI.getNumOperands() - 1).getImm();
2431 unsigned ZMask = Imm & 15;
2432 unsigned DstIdx = (Imm >> 4) & 3;
2433 unsigned SrcIdx = (Imm >> 6) & 3;
2437 if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
2440 assert(AltIdx < 4 &&
"Illegal insertion index");
2441 unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
2442 WorkingMI = CloneIfNew(
MI);
2451 case X86::VMOVSSrr: {
2459 Opc = X86::BLENDPDrri;
2463 Opc = X86::BLENDPSrri;
2467 Opc = X86::VBLENDPDrri;
2471 Opc = X86::VBLENDPSrri;
2476 WorkingMI = CloneIfNew(
MI);
2482 WorkingMI = CloneIfNew(
MI);
2487 case X86::SHUFPDrri: {
2489 assert(
MI.getOperand(3).getImm() == 0x02 &&
"Unexpected immediate!");
2490 WorkingMI = CloneIfNew(
MI);
2495 case X86::PCLMULQDQrri:
2496 case X86::VPCLMULQDQrri:
2497 case X86::VPCLMULQDQYrri:
2498 case X86::VPCLMULQDQZrri:
2499 case X86::VPCLMULQDQZ128rri:
2500 case X86::VPCLMULQDQZ256rri: {
2503 unsigned Imm =
MI.getOperand(3).getImm();
2504 unsigned Src1Hi = Imm & 0x01;
2505 unsigned Src2Hi = Imm & 0x10;
2506 WorkingMI = CloneIfNew(
MI);
2510 case X86::VPCMPBZ128rri:
2511 case X86::VPCMPUBZ128rri:
2512 case X86::VPCMPBZ256rri:
2513 case X86::VPCMPUBZ256rri:
2514 case X86::VPCMPBZrri:
2515 case X86::VPCMPUBZrri:
2516 case X86::VPCMPDZ128rri:
2517 case X86::VPCMPUDZ128rri:
2518 case X86::VPCMPDZ256rri:
2519 case X86::VPCMPUDZ256rri:
2520 case X86::VPCMPDZrri:
2521 case X86::VPCMPUDZrri:
2522 case X86::VPCMPQZ128rri:
2523 case X86::VPCMPUQZ128rri:
2524 case X86::VPCMPQZ256rri:
2525 case X86::VPCMPUQZ256rri:
2526 case X86::VPCMPQZrri:
2527 case X86::VPCMPUQZrri:
2528 case X86::VPCMPWZ128rri:
2529 case X86::VPCMPUWZ128rri:
2530 case X86::VPCMPWZ256rri:
2531 case X86::VPCMPUWZ256rri:
2532 case X86::VPCMPWZrri:
2533 case X86::VPCMPUWZrri:
2534 case X86::VPCMPBZ128rrik:
2535 case X86::VPCMPUBZ128rrik:
2536 case X86::VPCMPBZ256rrik:
2537 case X86::VPCMPUBZ256rrik:
2538 case X86::VPCMPBZrrik:
2539 case X86::VPCMPUBZrrik:
2540 case X86::VPCMPDZ128rrik:
2541 case X86::VPCMPUDZ128rrik:
2542 case X86::VPCMPDZ256rrik:
2543 case X86::VPCMPUDZ256rrik:
2544 case X86::VPCMPDZrrik:
2545 case X86::VPCMPUDZrrik:
2546 case X86::VPCMPQZ128rrik:
2547 case X86::VPCMPUQZ128rrik:
2548 case X86::VPCMPQZ256rrik:
2549 case X86::VPCMPUQZ256rrik:
2550 case X86::VPCMPQZrrik:
2551 case X86::VPCMPUQZrrik:
2552 case X86::VPCMPWZ128rrik:
2553 case X86::VPCMPUWZ128rrik:
2554 case X86::VPCMPWZ256rrik:
2555 case X86::VPCMPUWZ256rrik:
2556 case X86::VPCMPWZrrik:
2557 case X86::VPCMPUWZrrik:
2558 WorkingMI = CloneIfNew(
MI);
2562 MI.getOperand(
MI.getNumOperands() - 1).getImm() & 0x7));
2565 case X86::VPCOMUBri:
2567 case X86::VPCOMUDri:
2569 case X86::VPCOMUQri:
2571 case X86::VPCOMUWri:
2572 WorkingMI = CloneIfNew(
MI);
2577 case X86::VCMPSDZrri:
2578 case X86::VCMPSSZrri:
2579 case X86::VCMPPDZrri:
2580 case X86::VCMPPSZrri:
2581 case X86::VCMPSHZrri:
2582 case X86::VCMPPHZrri:
2583 case X86::VCMPPHZ128rri:
2584 case X86::VCMPPHZ256rri:
2585 case X86::VCMPPDZ128rri:
2586 case X86::VCMPPSZ128rri:
2587 case X86::VCMPPDZ256rri:
2588 case X86::VCMPPSZ256rri:
2589 case X86::VCMPPDZrrik:
2590 case X86::VCMPPSZrrik:
2591 case X86::VCMPPDZ128rrik:
2592 case X86::VCMPPSZ128rrik:
2593 case X86::VCMPPDZ256rrik:
2594 case X86::VCMPPSZ256rrik:
2595 WorkingMI = CloneIfNew(
MI);
2598 MI.getOperand(
MI.getNumExplicitOperands() - 1).getImm() & 0x1f));
2600 case X86::VPERM2F128rr:
2601 case X86::VPERM2I128rr:
2605 WorkingMI = CloneIfNew(
MI);
2608 case X86::MOVHLPSrr:
2609 case X86::UNPCKHPDrr:
2610 case X86::VMOVHLPSrr:
2611 case X86::VUNPCKHPDrr:
2612 case X86::VMOVHLPSZrr:
2613 case X86::VUNPCKHPDZ128rr:
2614 assert(Subtarget.
hasSSE2() &&
"Commuting MOVHLP/UNPCKHPD requires SSE2!");
2619 case X86::MOVHLPSrr:
2620 Opc = X86::UNPCKHPDrr;
2622 case X86::UNPCKHPDrr:
2623 Opc = X86::MOVHLPSrr;
2625 case X86::VMOVHLPSrr:
2626 Opc = X86::VUNPCKHPDrr;
2628 case X86::VUNPCKHPDrr:
2629 Opc = X86::VMOVHLPSrr;
2631 case X86::VMOVHLPSZrr:
2632 Opc = X86::VUNPCKHPDZ128rr;
2634 case X86::VUNPCKHPDZ128rr:
2635 Opc = X86::VMOVHLPSZrr;
2638 WorkingMI = CloneIfNew(
MI);
2644 WorkingMI = CloneIfNew(
MI);
2645 unsigned OpNo =
MI.getDesc().getNumOperands() - 1;
2650 case X86::VPTERNLOGDZrri:
2651 case X86::VPTERNLOGDZrmi:
2652 case X86::VPTERNLOGDZ128rri:
2653 case X86::VPTERNLOGDZ128rmi:
2654 case X86::VPTERNLOGDZ256rri:
2655 case X86::VPTERNLOGDZ256rmi:
2656 case X86::VPTERNLOGQZrri:
2657 case X86::VPTERNLOGQZrmi:
2658 case X86::VPTERNLOGQZ128rri:
2659 case X86::VPTERNLOGQZ128rmi:
2660 case X86::VPTERNLOGQZ256rri:
2661 case X86::VPTERNLOGQZ256rmi:
2662 case X86::VPTERNLOGDZrrik:
2663 case X86::VPTERNLOGDZ128rrik:
2664 case X86::VPTERNLOGDZ256rrik:
2665 case X86::VPTERNLOGQZrrik:
2666 case X86::VPTERNLOGQZ128rrik:
2667 case X86::VPTERNLOGQZ256rrik:
2668 case X86::VPTERNLOGDZrrikz:
2669 case X86::VPTERNLOGDZrmikz:
2670 case X86::VPTERNLOGDZ128rrikz:
2671 case X86::VPTERNLOGDZ128rmikz:
2672 case X86::VPTERNLOGDZ256rrikz:
2673 case X86::VPTERNLOGDZ256rmikz:
2674 case X86::VPTERNLOGQZrrikz:
2675 case X86::VPTERNLOGQZrmikz:
2676 case X86::VPTERNLOGQZ128rrikz:
2677 case X86::VPTERNLOGQZ128rmikz:
2678 case X86::VPTERNLOGQZ256rrikz:
2679 case X86::VPTERNLOGQZ256rmikz:
2680 case X86::VPTERNLOGDZ128rmbi:
2681 case X86::VPTERNLOGDZ256rmbi:
2682 case X86::VPTERNLOGDZrmbi:
2683 case X86::VPTERNLOGQZ128rmbi:
2684 case X86::VPTERNLOGQZ256rmbi:
2685 case X86::VPTERNLOGQZrmbi:
2686 case X86::VPTERNLOGDZ128rmbikz:
2687 case X86::VPTERNLOGDZ256rmbikz:
2688 case X86::VPTERNLOGDZrmbikz:
2689 case X86::VPTERNLOGQZ128rmbikz:
2690 case X86::VPTERNLOGQZ256rmbikz:
2691 case X86::VPTERNLOGQZrmbikz: {
2692 WorkingMI = CloneIfNew(
MI);
2698 WorkingMI = CloneIfNew(
MI);
2704 WorkingMI = CloneIfNew(
MI);
2713bool X86InstrInfo::findThreeSrcCommutedOpIndices(
const MachineInstr &
MI,
2714 unsigned &SrcOpIdx1,
2715 unsigned &SrcOpIdx2,
2716 bool IsIntrinsic)
const {
2719 unsigned FirstCommutableVecOp = 1;
2720 unsigned LastCommutableVecOp = 3;
2721 unsigned KMaskOp = -1U;
2744 FirstCommutableVecOp = 3;
2746 LastCommutableVecOp++;
2747 }
else if (IsIntrinsic) {
2750 FirstCommutableVecOp = 2;
2753 if (
isMem(
MI, LastCommutableVecOp))
2754 LastCommutableVecOp--;
2759 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2760 (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
2761 SrcOpIdx1 == KMaskOp))
2763 if (SrcOpIdx2 != CommuteAnyOperandIndex &&
2764 (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
2765 SrcOpIdx2 == KMaskOp))
2770 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2771 SrcOpIdx2 == CommuteAnyOperandIndex) {
2772 unsigned CommutableOpIdx2 = SrcOpIdx2;
2776 if (SrcOpIdx1 == SrcOpIdx2)
2779 CommutableOpIdx2 = LastCommutableVecOp;
2780 else if (SrcOpIdx2 == CommuteAnyOperandIndex)
2782 CommutableOpIdx2 = SrcOpIdx1;
2786 Register Op2Reg =
MI.getOperand(CommutableOpIdx2).getReg();
2788 unsigned CommutableOpIdx1;
2789 for (CommutableOpIdx1 = LastCommutableVecOp;
2790 CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
2792 if (CommutableOpIdx1 == KMaskOp)
2798 if (Op2Reg !=
MI.getOperand(CommutableOpIdx1).getReg())
2803 if (CommutableOpIdx1 < FirstCommutableVecOp)
2808 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2817 unsigned &SrcOpIdx1,
2818 unsigned &SrcOpIdx2)
const {
2820 if (!
Desc.isCommutable())
2823 switch (
MI.getOpcode()) {
2828 case X86::VCMPSDrri:
2829 case X86::VCMPSSrri:
2830 case X86::VCMPPDrri:
2831 case X86::VCMPPSrri:
2832 case X86::VCMPPDYrri:
2833 case X86::VCMPPSYrri:
2834 case X86::VCMPSDZrri:
2835 case X86::VCMPSSZrri:
2836 case X86::VCMPPDZrri:
2837 case X86::VCMPPSZrri:
2838 case X86::VCMPSHZrri:
2839 case X86::VCMPPHZrri:
2840 case X86::VCMPPHZ128rri:
2841 case X86::VCMPPHZ256rri:
2842 case X86::VCMPPDZ128rri:
2843 case X86::VCMPPSZ128rri:
2844 case X86::VCMPPDZ256rri:
2845 case X86::VCMPPSZ256rri:
2846 case X86::VCMPPDZrrik:
2847 case X86::VCMPPSZrrik:
2848 case X86::VCMPPDZ128rrik:
2849 case X86::VCMPPSZ128rrik:
2850 case X86::VCMPPDZ256rrik:
2851 case X86::VCMPPSZ256rrik: {
2856 unsigned Imm =
MI.getOperand(3 + OpOffset).getImm() & 0x7;
2873 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
2883 case X86::SHUFPDrri:
2885 if (
MI.getOperand(3).getImm() == 0x02)
2888 case X86::MOVHLPSrr:
2889 case X86::UNPCKHPDrr:
2890 case X86::VMOVHLPSrr:
2891 case X86::VUNPCKHPDrr:
2892 case X86::VMOVHLPSZrr:
2893 case X86::VUNPCKHPDZ128rr:
2897 case X86::VPTERNLOGDZrri:
2898 case X86::VPTERNLOGDZrmi:
2899 case X86::VPTERNLOGDZ128rri:
2900 case X86::VPTERNLOGDZ128rmi:
2901 case X86::VPTERNLOGDZ256rri:
2902 case X86::VPTERNLOGDZ256rmi:
2903 case X86::VPTERNLOGQZrri:
2904 case X86::VPTERNLOGQZrmi:
2905 case X86::VPTERNLOGQZ128rri:
2906 case X86::VPTERNLOGQZ128rmi:
2907 case X86::VPTERNLOGQZ256rri:
2908 case X86::VPTERNLOGQZ256rmi:
2909 case X86::VPTERNLOGDZrrik:
2910 case X86::VPTERNLOGDZ128rrik:
2911 case X86::VPTERNLOGDZ256rrik:
2912 case X86::VPTERNLOGQZrrik:
2913 case X86::VPTERNLOGQZ128rrik:
2914 case X86::VPTERNLOGQZ256rrik:
2915 case X86::VPTERNLOGDZrrikz:
2916 case X86::VPTERNLOGDZrmikz:
2917 case X86::VPTERNLOGDZ128rrikz:
2918 case X86::VPTERNLOGDZ128rmikz:
2919 case X86::VPTERNLOGDZ256rrikz:
2920 case X86::VPTERNLOGDZ256rmikz:
2921 case X86::VPTERNLOGQZrrikz:
2922 case X86::VPTERNLOGQZrmikz:
2923 case X86::VPTERNLOGQZ128rrikz:
2924 case X86::VPTERNLOGQZ128rmikz:
2925 case X86::VPTERNLOGQZ256rrikz:
2926 case X86::VPTERNLOGQZ256rmikz:
2927 case X86::VPTERNLOGDZ128rmbi:
2928 case X86::VPTERNLOGDZ256rmbi:
2929 case X86::VPTERNLOGDZrmbi:
2930 case X86::VPTERNLOGQZ128rmbi:
2931 case X86::VPTERNLOGQZ256rmbi:
2932 case X86::VPTERNLOGQZrmbi:
2933 case X86::VPTERNLOGDZ128rmbikz:
2934 case X86::VPTERNLOGDZ256rmbikz:
2935 case X86::VPTERNLOGDZrmbikz:
2936 case X86::VPTERNLOGQZ128rmbikz:
2937 case X86::VPTERNLOGQZ256rmbikz:
2938 case X86::VPTERNLOGQZrmbikz:
2939 return findThreeSrcCommutedOpIndices(
MI, SrcOpIdx1, SrcOpIdx2);
2940 case X86::VPDPWSSDYrr:
2941 case X86::VPDPWSSDrr:
2942 case X86::VPDPWSSDSYrr:
2943 case X86::VPDPWSSDSrr:
2944 case X86::VPDPWUUDrr:
2945 case X86::VPDPWUUDYrr:
2946 case X86::VPDPWUUDSrr:
2947 case X86::VPDPWUUDSYrr:
2948 case X86::VPDPBSSDSrr:
2949 case X86::VPDPBSSDSYrr:
2950 case X86::VPDPBSSDrr:
2951 case X86::VPDPBSSDYrr:
2952 case X86::VPDPBUUDSrr:
2953 case X86::VPDPBUUDSYrr:
2954 case X86::VPDPBUUDrr:
2955 case X86::VPDPBUUDYrr:
2956 case X86::VPDPBSSDSZ128r:
2957 case X86::VPDPBSSDSZ128rk:
2958 case X86::VPDPBSSDSZ128rkz:
2959 case X86::VPDPBSSDSZ256r:
2960 case X86::VPDPBSSDSZ256rk:
2961 case X86::VPDPBSSDSZ256rkz:
2962 case X86::VPDPBSSDSZr:
2963 case X86::VPDPBSSDSZrk:
2964 case X86::VPDPBSSDSZrkz:
2965 case X86::VPDPBSSDZ128r:
2966 case X86::VPDPBSSDZ128rk:
2967 case X86::VPDPBSSDZ128rkz:
2968 case X86::VPDPBSSDZ256r:
2969 case X86::VPDPBSSDZ256rk:
2970 case X86::VPDPBSSDZ256rkz:
2971 case X86::VPDPBSSDZr:
2972 case X86::VPDPBSSDZrk:
2973 case X86::VPDPBSSDZrkz:
2974 case X86::VPDPBUUDSZ128r:
2975 case X86::VPDPBUUDSZ128rk:
2976 case X86::VPDPBUUDSZ128rkz:
2977 case X86::VPDPBUUDSZ256r:
2978 case X86::VPDPBUUDSZ256rk:
2979 case X86::VPDPBUUDSZ256rkz:
2980 case X86::VPDPBUUDSZr:
2981 case X86::VPDPBUUDSZrk:
2982 case X86::VPDPBUUDSZrkz:
2983 case X86::VPDPBUUDZ128r:
2984 case X86::VPDPBUUDZ128rk:
2985 case X86::VPDPBUUDZ128rkz:
2986 case X86::VPDPBUUDZ256r:
2987 case X86::VPDPBUUDZ256rk:
2988 case X86::VPDPBUUDZ256rkz:
2989 case X86::VPDPBUUDZr:
2990 case X86::VPDPBUUDZrk:
2991 case X86::VPDPBUUDZrkz:
2992 case X86::VPDPWSSDZ128r:
2993 case X86::VPDPWSSDZ128rk:
2994 case X86::VPDPWSSDZ128rkz:
2995 case X86::VPDPWSSDZ256r:
2996 case X86::VPDPWSSDZ256rk:
2997 case X86::VPDPWSSDZ256rkz:
2998 case X86::VPDPWSSDZr:
2999 case X86::VPDPWSSDZrk:
3000 case X86::VPDPWSSDZrkz:
3001 case X86::VPDPWSSDSZ128r:
3002 case X86::VPDPWSSDSZ128rk:
3003 case X86::VPDPWSSDSZ128rkz:
3004 case X86::VPDPWSSDSZ256r:
3005 case X86::VPDPWSSDSZ256rk:
3006 case X86::VPDPWSSDSZ256rkz:
3007 case X86::VPDPWSSDSZr:
3008 case X86::VPDPWSSDSZrk:
3009 case X86::VPDPWSSDSZrkz:
3010 case X86::VPDPWUUDZ128r:
3011 case X86::VPDPWUUDZ128rk:
3012 case X86::VPDPWUUDZ128rkz:
3013 case X86::VPDPWUUDZ256r:
3014 case X86::VPDPWUUDZ256rk:
3015 case X86::VPDPWUUDZ256rkz:
3016 case X86::VPDPWUUDZr:
3017 case X86::VPDPWUUDZrk:
3018 case X86::VPDPWUUDZrkz:
3019 case X86::VPDPWUUDSZ128r:
3020 case X86::VPDPWUUDSZ128rk:
3021 case X86::VPDPWUUDSZ128rkz:
3022 case X86::VPDPWUUDSZ256r:
3023 case X86::VPDPWUUDSZ256rk:
3024 case X86::VPDPWUUDSZ256rkz:
3025 case X86::VPDPWUUDSZr:
3026 case X86::VPDPWUUDSZrk:
3027 case X86::VPDPWUUDSZrkz:
3028 case X86::VPMADD52HUQrr:
3029 case X86::VPMADD52HUQYrr:
3030 case X86::VPMADD52HUQZ128r:
3031 case X86::VPMADD52HUQZ128rk:
3032 case X86::VPMADD52HUQZ128rkz:
3033 case X86::VPMADD52HUQZ256r:
3034 case X86::VPMADD52HUQZ256rk:
3035 case X86::VPMADD52HUQZ256rkz:
3036 case X86::VPMADD52HUQZr:
3037 case X86::VPMADD52HUQZrk:
3038 case X86::VPMADD52HUQZrkz:
3039 case X86::VPMADD52LUQrr:
3040 case X86::VPMADD52LUQYrr:
3041 case X86::VPMADD52LUQZ128r:
3042 case X86::VPMADD52LUQZ128rk:
3043 case X86::VPMADD52LUQZ128rkz:
3044 case X86::VPMADD52LUQZ256r:
3045 case X86::VPMADD52LUQZ256rk:
3046 case X86::VPMADD52LUQZ256rkz:
3047 case X86::VPMADD52LUQZr:
3048 case X86::VPMADD52LUQZrk:
3049 case X86::VPMADD52LUQZrkz:
3050 case X86::VFMADDCPHZr:
3051 case X86::VFMADDCPHZrk:
3052 case X86::VFMADDCPHZrkz:
3053 case X86::VFMADDCPHZ128r:
3054 case X86::VFMADDCPHZ128rk:
3055 case X86::VFMADDCPHZ128rkz:
3056 case X86::VFMADDCPHZ256r:
3057 case X86::VFMADDCPHZ256rk:
3058 case X86::VFMADDCPHZ256rkz:
3059 case X86::VFMADDCSHZr:
3060 case X86::VFMADDCSHZrk:
3061 case X86::VFMADDCSHZrkz: {
3062 unsigned CommutableOpIdx1 = 2;
3063 unsigned CommutableOpIdx2 = 3;
3069 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3072 if (!
MI.getOperand(SrcOpIdx1).isReg() || !
MI.getOperand(SrcOpIdx2).isReg())
3082 return findThreeSrcCommutedOpIndices(
MI, SrcOpIdx1, SrcOpIdx2,
3089 unsigned CommutableOpIdx1 =
Desc.getNumDefs() + 1;
3090 unsigned CommutableOpIdx2 =
Desc.getNumDefs() + 2;
3093 if ((
MI.getDesc().getOperandConstraint(
Desc.getNumDefs(),
3108 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3112 if (!
MI.getOperand(SrcOpIdx1).isReg() ||
3113 !
MI.getOperand(SrcOpIdx2).isReg())
3125 unsigned Opcode =
MI->getOpcode();
3126 if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
3127 Opcode != X86::LEA64_32r)
3149 unsigned Opcode =
MI.getOpcode();
3150 if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
3178 if (!(X86::isJCC(Opcode) || X86::isSETCC(Opcode) || X86::isSETZUCC(Opcode) ||
3179 X86::isCMOVCC(Opcode) || X86::isCFCMOVCC(Opcode) ||
3180 X86::isCCMPCC(Opcode) || X86::isCTESTCC(Opcode)))
3202 return X86::isSETCC(
MI.getOpcode()) || X86::isSETZUCC(
MI.getOpcode())
3218 return X86::isCCMPCC(
MI.getOpcode()) || X86::isCTESTCC(
MI.getOpcode())
3249 enum { CF = 1, ZF = 2, SF = 4, OF = 8, PF = CF };
3280#define GET_X86_NF_TRANSFORM_TABLE
3281#define GET_X86_ND2NONND_TABLE
3282#include "X86GenInstrMapping.inc"
3287 return (
I == Table.
end() ||
I->OldOpc != Opc) ? 0U :
I->NewOpc;
3371std::pair<X86::CondCode, bool>
3374 bool NeedSwap =
false;
3375 switch (Predicate) {
3454 return std::make_pair(
CC, NeedSwap);
3463#define GET_ND_IF_ENABLED(OPC) (HasNDD ? OPC##_ND : OPC)
3557 switch (Imm & 0x3) {
3575 if (
Info.RegClass == X86::VR128RegClassID ||
3576 Info.RegClass == X86::VR128XRegClassID)
3578 if (
Info.RegClass == X86::VR256RegClassID ||
3579 Info.RegClass == X86::VR256XRegClassID)
3581 if (
Info.RegClass == X86::VR512RegClassID)
3588 return (Reg == X86::FPCW || Reg == X86::FPSW ||
3589 (Reg >= X86::ST0 && Reg <= X86::ST7));
3597 if (
MI.isCall() ||
MI.isInlineAsm())
3621#ifdef EXPENSIVE_CHECKS
3623 "Got false negative from X86II::getMemoryOperandNo()!");
3631 unsigned NumOps =
Desc.getNumOperands();
3633#ifdef EXPENSIVE_CHECKS
3635 "Expected no operands to have OPERAND_MEMORY type!");
3644 if (IsMemOp(
Desc.operands()[
I])) {
3645#ifdef EXPENSIVE_CHECKS
3649 "Expected all five operands in the memory reference to have "
3650 "OPERAND_MEMORY type!");
3662 "Unexpected number of operands!");
3665 if (!
Index.isReg() ||
Index.getReg() != X86::NoRegister)
3673 MI.getParent()->getParent()->getConstantPool()->getConstants();
3685 switch (
MI.getOpcode()) {
3686 case X86::TCRETURNdi:
3687 case X86::TCRETURNri:
3688 case X86::TCRETURNmi:
3689 case X86::TCRETURNdi64:
3690 case X86::TCRETURNri64:
3691 case X86::TCRETURNmi64:
3710 if (Symbol ==
"__x86_indirect_thunk_r11")
3715 if (TailCall.getOpcode() != X86::TCRETURNdi &&
3716 TailCall.getOpcode() != X86::TCRETURNdi64) {
3734 TailCall.getOperand(1).getImm() != 0) {
3750 if (
I->isDebugInstr())
3753 assert(0 &&
"Can't find the branch to replace!");
3757 if (
CC != BranchCond[0].getImm())
3763 unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
3764 : X86::TCRETURNdi64cc;
3778 for (
const auto &
C : Clobbers) {
3783 I->eraseFromParent();
3797 if (Succ->isEHPad() || (Succ ==
TBB && FallthroughBB))
3800 if (FallthroughBB && FallthroughBB !=
TBB)
3802 FallthroughBB = Succ;
3804 return FallthroughBB;
3807bool X86InstrInfo::analyzeBranchImpl(
3818 if (
I->isDebugInstr())
3823 if (!isUnpredicatedTerminator(*
I))
3832 if (
I->getOpcode() == X86::JMP_1) {
3836 TBB =
I->getOperand(0).getMBB();
3849 I->eraseFromParent();
3851 UnCondBrIter =
MBB.
end();
3856 TBB =
I->getOperand(0).getMBB();
3867 if (
I->findRegisterUseOperand(X86::EFLAGS,
nullptr)->isUndef())
3873 TBB =
I->getOperand(0).getMBB();
3887 auto NewTBB =
I->getOperand(0).getMBB();
3888 if (OldBranchCode == BranchCode &&
TBB == NewTBB)
3894 if (
TBB == NewTBB &&
3927 Cond[0].setImm(BranchCode);
3938 bool AllowModify)
const {
3940 return analyzeBranchImpl(
MBB,
TBB, FBB,
Cond, CondBranches, AllowModify);
3946 assert(MemRefBegin >= 0 &&
"instr should have memory operand");
3958 if (!Reg.isVirtual())
3963 unsigned Opcode =
MI->getOpcode();
3964 if (Opcode != X86::LEA64r && Opcode != X86::LEA32r)
3970 unsigned Opcode =
MI.getOpcode();
3973 if (Opcode == X86::JMP64m || Opcode == X86::JMP32m) {
3981 if (Opcode == X86::JMP64r || Opcode == X86::JMP32r) {
3983 if (!Reg.isVirtual())
3990 if (
Add->getOpcode() != X86::ADD64rr &&
Add->getOpcode() != X86::ADD32rr)
4003 MachineBranchPredicate &MBP,
4004 bool AllowModify)
const {
4005 using namespace std::placeholders;
4009 if (analyzeBranchImpl(
MBB, MBP.TrueDest, MBP.FalseDest,
Cond, CondBranches,
4013 if (
Cond.size() != 1)
4016 assert(MBP.TrueDest &&
"expected!");
4024 bool SingleUseCondition =
true;
4027 if (
MI.modifiesRegister(X86::EFLAGS,
TRI)) {
4032 if (
MI.readsRegister(X86::EFLAGS,
TRI))
4033 SingleUseCondition =
false;
4039 if (SingleUseCondition) {
4041 if (Succ->isLiveIn(X86::EFLAGS))
4042 SingleUseCondition =
false;
4045 MBP.ConditionDef = ConditionDef;
4046 MBP.SingleUseCondition = SingleUseCondition;
4053 const unsigned TestOpcode =
4054 Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
4056 if (ConditionDef->
getOpcode() == TestOpcode &&
4063 ? MachineBranchPredicate::PRED_NE
4064 : MachineBranchPredicate::PRED_EQ;
4072 int *BytesRemoved)
const {
4073 assert(!BytesRemoved &&
"code size not handled");
4080 if (
I->isDebugInstr())
4082 if (
I->getOpcode() != X86::JMP_1 &&
4086 I->eraseFromParent();
4100 assert(
TBB &&
"insertBranch must not be told to insert a fallthrough");
4102 "X86 branch conditions have one component!");
4103 assert(!BytesAdded &&
"code size not handled");
4107 assert(!FBB &&
"Unconditional branch with multiple successors!");
4113 bool FallThru = FBB ==
nullptr;
4128 if (FBB ==
nullptr) {
4130 assert(FBB &&
"MBB cannot be the last block in function when the false "
4131 "body is a fall-through.");
4155 Register FalseReg,
int &CondCycles,
4156 int &TrueCycles,
int &FalseCycles)
const {
4160 if (
Cond.size() != 1)
4169 RI.getCommonSubClass(
MRI.getRegClass(TrueReg),
MRI.getRegClass(FalseReg));
4174 if (X86::GR16RegClass.hasSubClassEq(RC) ||
4175 X86::GR32RegClass.hasSubClassEq(RC) ||
4176 X86::GR64RegClass.hasSubClassEq(RC)) {
4197 assert(
Cond.size() == 1 &&
"Invalid Cond array");
4200 false , Subtarget.hasNDD());
4209 return X86::GR8_ABCD_HRegClass.contains(Reg);
4215 bool HasAVX = Subtarget.
hasAVX();
4217 bool HasEGPR = Subtarget.hasEGPR();
4224 if (X86::VK16RegClass.
contains(SrcReg)) {
4225 if (X86::GR64RegClass.
contains(DestReg)) {
4226 assert(Subtarget.hasBWI());
4227 return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
4229 if (X86::GR32RegClass.
contains(DestReg))
4230 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
4231 : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
4239 if (X86::VK16RegClass.
contains(DestReg)) {
4240 if (X86::GR64RegClass.
contains(SrcReg)) {
4241 assert(Subtarget.hasBWI());
4242 return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
4244 if (X86::GR32RegClass.
contains(SrcReg))
4245 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
4246 : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
4254 if (X86::GR64RegClass.
contains(DestReg)) {
4255 if (X86::VR128XRegClass.
contains(SrcReg))
4257 return HasAVX512 ? X86::VMOVPQIto64Zrr
4258 : HasAVX ? X86::VMOVPQIto64rr
4259 : X86::MOVPQIto64rr;
4260 if (X86::VR64RegClass.
contains(SrcReg))
4262 return X86::MMX_MOVD64from64rr;
4263 }
else if (X86::GR64RegClass.
contains(SrcReg)) {
4265 if (X86::VR128XRegClass.
contains(DestReg))
4266 return HasAVX512 ? X86::VMOV64toPQIZrr
4267 : HasAVX ? X86::VMOV64toPQIrr
4268 : X86::MOV64toPQIrr;
4270 if (X86::VR64RegClass.
contains(DestReg))
4271 return X86::MMX_MOVD64to64rr;
4277 if (X86::GR32RegClass.
contains(DestReg) &&
4278 X86::VR128XRegClass.contains(SrcReg))
4280 return HasAVX512 ? X86::VMOVPDI2DIZrr
4281 : HasAVX ? X86::VMOVPDI2DIrr
4284 if (X86::VR128XRegClass.
contains(DestReg) &&
4285 X86::GR32RegClass.contains(SrcReg))
4287 return HasAVX512 ? X86::VMOVDI2PDIZrr
4288 : HasAVX ? X86::VMOVDI2PDIrr
4298 bool HasAVX = Subtarget.
hasAVX();
4299 bool HasVLX = Subtarget.hasVLX();
4300 bool HasEGPR = Subtarget.hasEGPR();
4302 if (X86::GR64RegClass.
contains(DestReg, SrcReg))
4304 else if (X86::GR32RegClass.
contains(DestReg, SrcReg))
4306 else if (X86::GR16RegClass.
contains(DestReg, SrcReg))
4308 else if (X86::GR8RegClass.
contains(DestReg, SrcReg)) {
4311 if ((
isHReg(DestReg) ||
isHReg(SrcReg)) && Subtarget.is64Bit()) {
4312 Opc = X86::MOV8rr_NOREX;
4315 "8-bit H register can not be copied outside GR8_NOREX");
4318 }
else if (X86::VR64RegClass.
contains(DestReg, SrcReg))
4319 Opc = X86::MMX_MOVQ64rr;
4320 else if (X86::VR128XRegClass.
contains(DestReg, SrcReg)) {
4322 Opc = X86::VMOVAPSZ128rr;
4323 else if (X86::VR128RegClass.
contains(DestReg, SrcReg))
4324 Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
4328 Opc = X86::VMOVAPSZrr;
4331 TRI->getMatchingSuperReg(DestReg, X86::sub_xmm, &X86::VR512RegClass);
4333 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
4335 }
else if (X86::VR256XRegClass.
contains(DestReg, SrcReg)) {
4337 Opc = X86::VMOVAPSZ256rr;
4338 else if (X86::VR256RegClass.
contains(DestReg, SrcReg))
4339 Opc = X86::VMOVAPSYrr;
4343 Opc = X86::VMOVAPSZrr;
4346 TRI->getMatchingSuperReg(DestReg, X86::sub_ymm, &X86::VR512RegClass);
4348 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
4350 }
else if (X86::VR512RegClass.
contains(DestReg, SrcReg))
4351 Opc = X86::VMOVAPSZrr;
4354 else if (X86::VK16RegClass.
contains(DestReg, SrcReg))
4355 Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
4356 : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
4366 if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
4374 LLVM_DEBUG(
dbgs() <<
"Cannot copy " << RI.getName(SrcReg) <<
" to "
4375 << RI.getName(DestReg) <<
'\n');
4379std::optional<DestSourcePair>
4381 if (
MI.isMoveReg()) {
4385 if (
MI.getOperand(0).isUndef() &&
MI.getOperand(0).getSubReg())
4386 return std::nullopt;
4390 return std::nullopt;
4395 return Load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
4398 : STI.
hasAVX() ? X86::VMOVSSrm
4402 : STI.
hasAVX() ? X86::VMOVSSmr
4408 bool IsStackAligned,
4410 bool HasAVX = STI.
hasAVX();
4412 bool HasVLX = STI.hasVLX();
4413 bool HasEGPR = STI.hasEGPR();
4415 assert(RC !=
nullptr &&
"Invalid target register class");
4420 assert(X86::GR8RegClass.hasSubClassEq(RC) &&
"Unknown 1-byte regclass");
4424 if (
isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
4425 return Load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
4426 return Load ? X86::MOV8rm : X86::MOV8mr;
4428 if (X86::VK16RegClass.hasSubClassEq(RC))
4429 return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
4430 : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
4431 assert(X86::GR16RegClass.hasSubClassEq(RC) &&
"Unknown 2-byte regclass");
4432 return Load ? X86::MOV16rm : X86::MOV16mr;
4434 if (X86::GR32RegClass.hasSubClassEq(RC))
4435 return Load ? X86::MOV32rm : X86::MOV32mr;
4436 if (X86::FR32XRegClass.hasSubClassEq(RC))
4437 return Load ? (HasAVX512 ? X86::VMOVSSZrm_alt
4438 : HasAVX ? X86::VMOVSSrm_alt
4440 : (HasAVX512 ? X86::VMOVSSZmr
4441 : HasAVX ? X86::VMOVSSmr
4443 if (X86::RFP32RegClass.hasSubClassEq(RC))
4444 return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
4445 if (X86::VK32RegClass.hasSubClassEq(RC)) {
4446 assert(STI.hasBWI() &&
"KMOVD requires BWI");
4447 return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
4448 : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
4452 if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
4453 X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
4454 X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
4455 X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
4456 X86::VK16PAIRRegClass.hasSubClassEq(RC))
4457 return Load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
4458 if (X86::FR16RegClass.hasSubClassEq(RC) ||
4459 X86::FR16XRegClass.hasSubClassEq(RC))
4463 if (X86::GR64RegClass.hasSubClassEq(RC))
4464 return Load ? X86::MOV64rm : X86::MOV64mr;
4465 if (X86::FR64XRegClass.hasSubClassEq(RC))
4466 return Load ? (HasAVX512 ? X86::VMOVSDZrm_alt
4467 : HasAVX ? X86::VMOVSDrm_alt
4469 : (HasAVX512 ? X86::VMOVSDZmr
4470 : HasAVX ? X86::VMOVSDmr
4472 if (X86::VR64RegClass.hasSubClassEq(RC))
4473 return Load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
4474 if (X86::RFP64RegClass.hasSubClassEq(RC))
4475 return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
4476 if (X86::VK64RegClass.hasSubClassEq(RC)) {
4477 assert(STI.hasBWI() &&
"KMOVQ requires BWI");
4478 return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
4479 : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
4483 assert(X86::RFP80RegClass.hasSubClassEq(RC) &&
"Unknown 10-byte regclass");
4484 return Load ? X86::LD_Fp80m : X86::ST_FpP80m;
4486 if (X86::VR128XRegClass.hasSubClassEq(RC)) {
4489 return Load ? (HasVLX ? X86::VMOVAPSZ128rm
4490 : HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX
4491 : HasAVX ? X86::VMOVAPSrm
4493 : (HasVLX ? X86::VMOVAPSZ128mr
4494 : HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX
4495 : HasAVX ? X86::VMOVAPSmr
4498 return Load ? (HasVLX ? X86::VMOVUPSZ128rm
4499 : HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX
4500 : HasAVX ? X86::VMOVUPSrm
4502 : (HasVLX ? X86::VMOVUPSZ128mr
4503 : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX
4504 : HasAVX ? X86::VMOVUPSmr
4510 assert(X86::VR256XRegClass.hasSubClassEq(RC) &&
"Unknown 32-byte regclass");
4513 return Load ? (HasVLX ? X86::VMOVAPSZ256rm
4514 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
4516 : (HasVLX ? X86::VMOVAPSZ256mr
4517 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
4520 return Load ? (HasVLX ? X86::VMOVUPSZ256rm
4521 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
4523 : (HasVLX ? X86::VMOVUPSZ256mr
4524 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
4527 assert(X86::VR512RegClass.hasSubClassEq(RC) &&
"Unknown 64-byte regclass");
4530 return Load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
4532 return Load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
4534 assert(X86::TILERegClass.hasSubClassEq(RC) &&
"Unknown 1024-byte regclass");
4535 assert(STI.hasAMXTILE() &&
"Using 8*1024-bit register requires AMX-TILE");
4536#define GET_EGPR_IF_ENABLED(OPC) (STI.hasEGPR() ? OPC##_EVEX : OPC)
4539#undef GET_EGPR_IF_ENABLED
4543std::optional<ExtAddrMode>
4548 if (MemRefBegin < 0)
4549 return std::nullopt;
4554 if (!BaseOp.isReg())
4555 return std::nullopt;
4559 if (!DispMO.
isImm())
4560 return std::nullopt;
4586 ErrInfo =
"Scale factor in address must be 1, 2, 4 or 8";
4591 ErrInfo =
"Displacement in address must fit into 32-bit signed "
4601 int64_t &ImmVal)
const {
4607 if (
MI.isSubregToReg()) {
4611 if (!
MI.getOperand(1).isImm())
4613 unsigned FillBits =
MI.getOperand(1).getImm();
4614 unsigned SubIdx =
MI.getOperand(3).getImm();
4615 MovReg =
MI.getOperand(2).getReg();
4616 if (SubIdx != X86::sub_32bit || FillBits != 0)
4619 MovMI =
MRI.getUniqueVRegDef(MovReg);
4624 if (MovMI->
getOpcode() == X86::MOV32r0 &&
4630 if (MovMI->
getOpcode() != X86::MOV32ri &&
4644 if (!
MI->modifiesRegister(NullValueReg,
TRI))
4646 switch (
MI->getOpcode()) {
4653 assert(
MI->getOperand(0).isDef() &&
MI->getOperand(1).isUse() &&
4654 "expected for shift opcode!");
4655 return MI->getOperand(0).getReg() == NullValueReg &&
4656 MI->getOperand(1).getReg() == NullValueReg;
4661 return TRI->isSubRegisterEq(NullValueReg, MO.getReg());
4675 if (MemRefBegin < 0)
4682 if (!BaseOp->
isReg())
4695 if (!DispMO.
isImm())
4700 if (!BaseOp->
isReg())
4703 OffsetIsScalable =
false;
4708 !
MemOp.memoperands_empty() ?
MemOp.memoperands().front()->getSize() : 0;
4715 bool IsStackAligned,
4730 case X86::TILELOADD:
4731 case X86::TILESTORED:
4732 case X86::TILELOADD_EVEX:
4733 case X86::TILESTORED_EVEX:
4740 unsigned Opc,
Register Reg,
int FrameIdx,
4741 bool isKill)
const {
4745 case X86::TILESTORED:
4746 case X86::TILESTORED_EVEX: {
4759 case X86::TILELOADD:
4760 case X86::TILELOADD_EVEX: {
4782 "Stack slot too small for store");
4784 unsigned Alignment = std::max<uint32_t>(
TRI->getSpillSize(*RC), 16);
4806 "Load size exceeds stack slot");
4807 unsigned Alignment = std::max<uint32_t>(
TRI->getSpillSize(*RC), 16);
4821 Register &SrcReg2, int64_t &CmpMask,
4822 int64_t &CmpValue)
const {
4823 switch (
MI.getOpcode()) {
4826 case X86::CMP64ri32:
4830 SrcReg =
MI.getOperand(0).getReg();
4832 if (
MI.getOperand(1).isImm()) {
4834 CmpValue =
MI.getOperand(1).getImm();
4836 CmpMask = CmpValue = 0;
4844 SrcReg =
MI.getOperand(1).getReg();
4853 SrcReg =
MI.getOperand(1).getReg();
4854 SrcReg2 =
MI.getOperand(2).getReg();
4862 SrcReg =
MI.getOperand(1).getReg();
4864 if (
MI.getOperand(2).isImm()) {
4866 CmpValue =
MI.getOperand(2).getImm();
4868 CmpMask = CmpValue = 0;
4875 SrcReg =
MI.getOperand(0).getReg();
4876 SrcReg2 =
MI.getOperand(1).getReg();
4884 SrcReg =
MI.getOperand(0).getReg();
4885 if (
MI.getOperand(1).getReg() != SrcReg)
4896bool X86InstrInfo::isRedundantFlagInstr(
const MachineInstr &FlagI,
4898 int64_t ImmMask, int64_t ImmValue,
4900 int64_t *ImmDelta)
const {
4915 OIMask != ImmMask || OIValue != ImmValue)
4917 if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
4921 if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
4927 case X86::CMP64ri32:
4938 case X86::TEST8rr: {
4945 SrcReg == OISrcReg && ImmMask == OIMask) {
4946 if (OIValue == ImmValue) {
4949 }
else if (
static_cast<uint64_t>(ImmValue) ==
4950 static_cast<uint64_t>(OIValue) - 1) {
4953 }
else if (
static_cast<uint64_t>(ImmValue) ==
4954 static_cast<uint64_t>(OIValue) + 1) {
4972 bool &ClearsOverflowFlag) {
4974 ClearsOverflowFlag =
false;
4980 if (
MI.getOpcode() == X86::ADD64rm ||
MI.getOpcode() == X86::ADD32rm) {
4981 unsigned Flags =
MI.getOperand(5).getTargetFlags();
4987 switch (
MI.getOpcode()) {
5083 case X86::LZCNT16rr:
5084 case X86::LZCNT16rm:
5085 case X86::LZCNT32rr:
5086 case X86::LZCNT32rm:
5087 case X86::LZCNT64rr:
5088 case X86::LZCNT64rm:
5089 case X86::POPCNT16rr:
5090 case X86::POPCNT16rm:
5091 case X86::POPCNT32rr:
5092 case X86::POPCNT32rm:
5093 case X86::POPCNT64rr:
5094 case X86::POPCNT64rm:
5095 case X86::TZCNT16rr:
5096 case X86::TZCNT16rm:
5097 case X86::TZCNT32rr:
5098 case X86::TZCNT32rm:
5099 case X86::TZCNT64rr:
5100 case X86::TZCNT64rm:
5146 case X86::BLSMSK32rr:
5147 case X86::BLSMSK32rm:
5148 case X86::BLSMSK64rr:
5149 case X86::BLSMSK64rm:
5154 case X86::BLCFILL32rr:
5155 case X86::BLCFILL32rm:
5156 case X86::BLCFILL64rr:
5157 case X86::BLCFILL64rm:
5162 case X86::BLCIC32rr:
5163 case X86::BLCIC32rm:
5164 case X86::BLCIC64rr:
5165 case X86::BLCIC64rm:
5166 case X86::BLCMSK32rr:
5167 case X86::BLCMSK32rm:
5168 case X86::BLCMSK64rr:
5169 case X86::BLCMSK64rm:
5174 case X86::BLSFILL32rr:
5175 case X86::BLSFILL32rm:
5176 case X86::BLSFILL64rr:
5177 case X86::BLSFILL64rm:
5178 case X86::BLSIC32rr:
5179 case X86::BLSIC32rm:
5180 case X86::BLSIC64rr:
5181 case X86::BLSIC64rm:
5186 case X86::T1MSKC32rr:
5187 case X86::T1MSKC32rm:
5188 case X86::T1MSKC64rr:
5189 case X86::T1MSKC64rm:
5190 case X86::TZMSK32rr:
5191 case X86::TZMSK32rm:
5192 case X86::TZMSK64rr:
5193 case X86::TZMSK64rm:
5197 ClearsOverflowFlag =
true;
5199 case X86::BEXTR32rr:
5200 case X86::BEXTR64rr:
5201 case X86::BEXTR32rm:
5202 case X86::BEXTR64rm:
5203 case X86::BEXTRI32ri:
5204 case X86::BEXTRI32mi:
5205 case X86::BEXTRI64ri:
5206 case X86::BEXTRI64mi:
5216 switch (
MI.getOpcode()) {
5224 case X86::LZCNT16rr:
5225 case X86::LZCNT32rr:
5226 case X86::LZCNT64rr:
5228 case X86::POPCNT16rr:
5229 case X86::POPCNT32rr:
5230 case X86::POPCNT64rr:
5232 case X86::TZCNT16rr:
5233 case X86::TZCNT32rr:
5234 case X86::TZCNT64rr:
5248 case X86::BLSMSK32rr:
5249 case X86::BLSMSK64rr:
5281 unsigned NewOpcode = 0;
5282#define FROM_TO(A, B) \
5283 CASE_ND(A) NewOpcode = X86::B; \
5307 if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
5308 NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
5316 bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
5322 assert(SrcRegDef &&
"Must have a definition (SSA)");
5327 bool NoSignFlag =
false;
5328 bool ClearsOverflowFlag =
false;
5329 bool ShouldUpdateCC =
false;
5330 bool IsSwapped =
false;
5332 int64_t ImmDelta = 0;
5345 if (&Inst == SrcRegDef) {
5368 NoSignFlag, ClearsOverflowFlag)) {
5377 if (Inst.modifiesRegister(X86::EFLAGS,
TRI)) {
5388 Inst.getOperand(1).getReg() == SrcReg) {
5389 ShouldUpdateCC =
true;
5400 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
5401 Inst, &IsSwapped, &ImmDelta)) {
5409 if (!Movr0Inst && Inst.
getOpcode() == X86::MOV32r0 &&
5410 Inst.registerDefIsDead(X86::EFLAGS,
TRI)) {
5435 bool FlagsMayLiveOut =
true;
5440 bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS,
TRI);
5441 bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS,
TRI);
5443 if (!UseEFLAGS && ModifyEFLAGS) {
5445 FlagsMayLiveOut =
false;
5448 if (!UseEFLAGS && !ModifyEFLAGS)
5479 if (!ClearsOverflowFlag)
5498 ReplacementCC = NewCC;
5504 }
else if (IsSwapped) {
5511 ShouldUpdateCC =
true;
5512 }
else if (ImmDelta != 0) {
5513 unsigned BitWidth =
TRI->getRegSizeInBits(*
MRI->getRegClass(SrcReg));
5523 if (ImmDelta != 1 || CmpValue == 0)
5533 if (ImmDelta != 1 || CmpValue == 0)
5560 ShouldUpdateCC =
true;
5563 if (ShouldUpdateCC && ReplacementCC != OldCC) {
5567 OpsToUpdate.
push_back(std::make_pair(&Instr, ReplacementCC));
5569 if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS,
TRI)) {
5571 FlagsMayLiveOut =
false;
5578 if ((
MI !=
nullptr || ShouldUpdateCC) && FlagsMayLiveOut) {
5585 assert((
MI ==
nullptr || Sub ==
nullptr) &&
"Should not have Sub and MI set");
5586 Sub =
MI !=
nullptr ?
MI : Sub;
5592 if (&CmpMBB != SubBB)
5597 for (; InsertI != InsertE; ++InsertI) {
5599 if (!Instr->readsRegister(X86::EFLAGS,
TRI) &&
5600 Instr->modifiesRegister(X86::EFLAGS,
TRI)) {
5607 if (InsertI == InsertE)
5614 assert(FlagDef &&
"Unable to locate a def EFLAGS operand");
5620 for (
auto &
Op : OpsToUpdate) {
5643 DefMI =
MRI->getVRegDef(FoldAsLoadDefReg);
5645 bool SawStore =
false;
5651 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5656 if (Reg != FoldAsLoadDefReg)
5663 if (SrcOperandIds.
empty())
5668 FoldAsLoadDefReg = 0;
5698#define FROM_TO(FROM, TO) \
5701 case X86::FROM##_ND: \
5702 return X86::TO##_ND;
5732#define FROM_TO(FROM, TO) \
5736 FROM_TO(CTEST64rr, CTEST64ri32)
5755 bool MakeChange)
const {
5762 if (
Reg.isVirtual())
5763 RC =
MRI->getRegClass(Reg);
5764 if ((
Reg.isPhysical() && X86::GR64RegClass.contains(Reg)) ||
5765 (
Reg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC))) {
5766 if (!isInt<32>(ImmVal))
5770 if (
UseMI.findRegisterUseOperand(Reg,
nullptr)->getSubReg())
5774 if (
UseMI.getMF()->getFunction().hasOptSize() &&
Reg.isVirtual() &&
5775 !
MRI->hasOneNonDBGUse(Reg))
5778 unsigned Opc =
UseMI.getOpcode();
5780 if (Opc == TargetOpcode::COPY) {
5784 RC =
MRI->getRegClass(ToReg);
5785 bool GR32Reg = (ToReg.
isVirtual() && X86::GR32RegClass.hasSubClassEq(RC)) ||
5786 (ToReg.
isPhysical() && X86::GR32RegClass.contains(ToReg));
5787 bool GR64Reg = (ToReg.
isVirtual() && X86::GR64RegClass.hasSubClassEq(RC)) ||
5788 (ToReg.
isPhysical() && X86::GR64RegClass.contains(ToReg));
5789 bool GR8Reg = (ToReg.
isVirtual() && X86::GR8RegClass.hasSubClassEq(RC)) ||
5790 (ToReg.
isPhysical() && X86::GR8RegClass.contains(ToReg));
5799 if (isUInt<32>(ImmVal))
5800 NewOpc = X86::MOV32ri64;
5802 NewOpc = X86::MOV64ri;
5803 }
else if (GR32Reg) {
5804 NewOpc = X86::MOV32ri;
5808 if (
UseMI.getParent()->computeRegisterLiveness(
5817 UseMI.removeOperand(
5818 UseMI.findRegisterUseOperandIdx(Reg,
nullptr));
5826 NewOpc = X86::MOV8ri;
5836 if ((NewOpc == X86::SUB64ri32 || NewOpc == X86::SUB32ri ||
5837 NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri ||
5838 NewOpc == X86::SUB64ri32_ND || NewOpc == X86::SUB32ri_ND ||
5839 NewOpc == X86::SBB64ri32_ND || NewOpc == X86::SBB32ri_ND) &&
5840 UseMI.findRegisterUseOperandIdx(Reg,
nullptr) != 2)
5843 if (((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) ||
5844 (NewOpc == X86::CCMP64ri32 || NewOpc == X86::CCMP32ri)) &&
5845 UseMI.findRegisterUseOperandIdx(Reg,
nullptr) != 1)
5848 using namespace X86;
5849 if (isSHL(Opc) || isSHR(Opc) || isSAR(Opc) || isROL(Opc) || isROR(Opc) ||
5850 isRCL(Opc) || isRCR(Opc)) {
5851 unsigned RegIdx =
UseMI.findRegisterUseOperandIdx(Reg,
nullptr);
5854 if (!isInt<8>(ImmVal))
5861 UseMI.removeOperand(RegIdx);
5875 UseMI.registerDefIsDead(X86::EFLAGS,
nullptr)) {
5879 UseMI.setDesc(
get(TargetOpcode::COPY));
5880 UseMI.removeOperand(
5881 UseMI.findRegisterUseOperandIdx(Reg,
nullptr));
5882 UseMI.removeOperand(
5883 UseMI.findRegisterDefOperandIdx(X86::EFLAGS,
nullptr));
5884 UseMI.untieRegOperand(0);
5888 unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
5889 unsigned ImmOpNum = 2;
5890 if (!
UseMI.getOperand(0).isDef()) {
5894 if (Opc == TargetOpcode::COPY)
5898 commuteInstruction(
UseMI);
5902 UseMI.getOperand(ImmOpNum).ChangeToImmediate(ImmVal);
5906 if (
Reg.isVirtual() &&
MRI->use_nodbg_empty(Reg))
5920 return foldImmediateImpl(
UseMI, &
DefMI, Reg, ImmVal,
MRI,
true);
5932 assert(
Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.");
5952 assert(
Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.");
5970 MIB->
setDesc(
TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
5982 assert(Imm != 0 &&
"Using push/pop for 0 is not efficient.");
5985 int StackAdjustment;
5987 if (Subtarget.is64Bit()) {
5989 MIB->
getOpcode() == X86::MOV32ImmSExti8);
6003 StackAdjustment = 8;
6009 StackAdjustment = 4;
6021 bool EmitCFI = !TFL->
hasFP(MF) && NeedsDwarfCFI;
6068 MIB->
getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr;
6080 const MCInstrDesc &BroadcastDesc,
unsigned SubIdx) {
6083 if (
TRI->getEncodingValue(DestReg) < 16) {
6090 DestReg =
TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
6102 const MCInstrDesc &ExtractDesc,
unsigned SubIdx) {
6105 if (
TRI->getEncodingValue(SrcReg) < 16) {
6112 SrcReg =
TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
6133 bool HasAVX = Subtarget.
hasAVX();
6135 switch (
MI.getOpcode()) {
6142 case X86::MOV32ImmSExti8:
6143 case X86::MOV64ImmSExti8:
6145 case X86::SETB_C32r:
6147 case X86::SETB_C64r:
6155 case X86::FsFLD0F128:
6157 case X86::AVX_SET0: {
6158 assert(HasAVX &&
"AVX not supported");
6161 Register XReg =
TRI->getSubReg(SrcReg, X86::sub_xmm);
6167 case X86::AVX512_128_SET0:
6168 case X86::AVX512_FsFLD0SH:
6169 case X86::AVX512_FsFLD0SS:
6170 case X86::AVX512_FsFLD0SD:
6171 case X86::AVX512_FsFLD0F128: {
6172 bool HasVLX = Subtarget.hasVLX();
6175 if (HasVLX ||
TRI->getEncodingValue(SrcReg) < 16)
6177 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
6180 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
6184 case X86::AVX512_256_SET0:
6185 case X86::AVX512_512_SET0: {
6186 bool HasVLX = Subtarget.hasVLX();
6189 if (HasVLX ||
TRI->getEncodingValue(SrcReg) < 16) {
6190 Register XReg =
TRI->getSubReg(SrcReg, X86::sub_xmm);
6196 if (
MI.getOpcode() == X86::AVX512_256_SET0) {
6199 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
6204 case X86::V_SETALLONES:
6206 get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
6207 case X86::AVX2_SETALLONES:
6209 case X86::AVX1_SETALLONES: {
6216 case X86::AVX512_512_SETALLONES: {
6227 case X86::AVX512_512_SEXT_MASK_32:
6228 case X86::AVX512_512_SEXT_MASK_64: {
6232 unsigned Opc = (
MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64)
6233 ? X86::VPTERNLOGQZrrikz
6234 : X86::VPTERNLOGDZrrikz;
6235 MI.removeOperand(1);
6240 .
addReg(MaskReg, MaskState)
6246 case X86::VMOVAPSZ128rm_NOVLX:
6248 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6249 case X86::VMOVUPSZ128rm_NOVLX:
6251 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6252 case X86::VMOVAPSZ256rm_NOVLX:
6254 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6255 case X86::VMOVUPSZ256rm_NOVLX:
6257 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6258 case X86::VMOVAPSZ128mr_NOVLX:
6260 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6261 case X86::VMOVUPSZ128mr_NOVLX:
6263 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6264 case X86::VMOVAPSZ256mr_NOVLX:
6266 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6267 case X86::VMOVUPSZ256mr_NOVLX:
6269 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6270 case X86::MOV32ri64: {
6272 Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
6273 MI.setDesc(
get(X86::MOV32ri));
6279 case X86::RDFLAGS32:
6280 case X86::RDFLAGS64: {
6281 unsigned Is64Bit =
MI.getOpcode() == X86::RDFLAGS64;
6285 get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
6293 "Unexpected register in operand! Should be EFLAGS.");
6296 "Unexpected register in operand! Should be DF.");
6299 MIB->
setDesc(
get(Is64Bit ? X86::POP64r : X86::POP32r));
6303 case X86::WRFLAGS32:
6304 case X86::WRFLAGS64: {
6305 unsigned Is64Bit =
MI.getOpcode() == X86::WRFLAGS64;
6309 get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
6310 .
addReg(
MI.getOperand(0).getReg());
6312 get(Is64Bit ? X86::POPF64 : X86::POPF32));
6313 MI.eraseFromParent();
6336 case TargetOpcode::LOAD_STACK_GUARD:
6342 case X86::SHLDROT32ri:
6344 case X86::SHLDROT64ri:
6346 case X86::SHRDROT32ri:
6348 case X86::SHRDROT64ri:
6350 case X86::ADD8rr_DB:
6353 case X86::ADD16rr_DB:
6356 case X86::ADD32rr_DB:
6359 case X86::ADD64rr_DB:
6362 case X86::ADD8ri_DB:
6365 case X86::ADD16ri_DB:
6368 case X86::ADD32ri_DB:
6371 case X86::ADD64ri32_DB:
6395 bool ForLoadFold =
false) {
6397 case X86::CVTSI2SSrr:
6398 case X86::CVTSI2SSrm:
6399 case X86::CVTSI642SSrr:
6400 case X86::CVTSI642SSrm:
6401 case X86::CVTSI2SDrr:
6402 case X86::CVTSI2SDrm:
6403 case X86::CVTSI642SDrr:
6404 case X86::CVTSI642SDrm:
6407 return !ForLoadFold;
6408 case X86::CVTSD2SSrr:
6409 case X86::CVTSD2SSrm:
6410 case X86::CVTSS2SDrr:
6411 case X86::CVTSS2SDrm:
6418 case X86::RCPSSr_Int:
6419 case X86::RCPSSm_Int:
6420 case X86::ROUNDSDri:
6421 case X86::ROUNDSDmi:
6422 case X86::ROUNDSSri:
6423 case X86::ROUNDSSmi:
6426 case X86::RSQRTSSr_Int:
6427 case X86::RSQRTSSm_Int:
6430 case X86::SQRTSSr_Int:
6431 case X86::SQRTSSm_Int:
6434 case X86::SQRTSDr_Int:
6435 case X86::SQRTSDm_Int:
6437 case X86::VFCMULCPHZ128rm:
6438 case X86::VFCMULCPHZ128rmb:
6439 case X86::VFCMULCPHZ128rmbkz:
6440 case X86::VFCMULCPHZ128rmkz:
6441 case X86::VFCMULCPHZ128rr:
6442 case X86::VFCMULCPHZ128rrkz:
6443 case X86::VFCMULCPHZ256rm:
6444 case X86::VFCMULCPHZ256rmb:
6445 case X86::VFCMULCPHZ256rmbkz:
6446 case X86::VFCMULCPHZ256rmkz:
6447 case X86::VFCMULCPHZ256rr:
6448 case X86::VFCMULCPHZ256rrkz:
6449 case X86::VFCMULCPHZrm:
6450 case X86::VFCMULCPHZrmb:
6451 case X86::VFCMULCPHZrmbkz:
6452 case X86::VFCMULCPHZrmkz:
6453 case X86::VFCMULCPHZrr:
6454 case X86::VFCMULCPHZrrb:
6455 case X86::VFCMULCPHZrrbkz:
6456 case X86::VFCMULCPHZrrkz:
6457 case X86::VFMULCPHZ128rm:
6458 case X86::VFMULCPHZ128rmb:
6459 case X86::VFMULCPHZ128rmbkz:
6460 case X86::VFMULCPHZ128rmkz:
6461 case X86::VFMULCPHZ128rr:
6462 case X86::VFMULCPHZ128rrkz:
6463 case X86::VFMULCPHZ256rm:
6464 case X86::VFMULCPHZ256rmb:
6465 case X86::VFMULCPHZ256rmbkz:
6466 case X86::VFMULCPHZ256rmkz:
6467 case X86::VFMULCPHZ256rr:
6468 case X86::VFMULCPHZ256rrkz:
6469 case X86::VFMULCPHZrm:
6470 case X86::VFMULCPHZrmb:
6471 case X86::VFMULCPHZrmbkz:
6472 case X86::VFMULCPHZrmkz:
6473 case X86::VFMULCPHZrr:
6474 case X86::VFMULCPHZrrb:
6475 case X86::VFMULCPHZrrbkz:
6476 case X86::VFMULCPHZrrkz:
6477 case X86::VFCMULCSHZrm:
6478 case X86::VFCMULCSHZrmkz:
6479 case X86::VFCMULCSHZrr:
6480 case X86::VFCMULCSHZrrb:
6481 case X86::VFCMULCSHZrrbkz:
6482 case X86::VFCMULCSHZrrkz:
6483 case X86::VFMULCSHZrm:
6484 case X86::VFMULCSHZrmkz:
6485 case X86::VFMULCSHZrr:
6486 case X86::VFMULCSHZrrb:
6487 case X86::VFMULCSHZrrbkz:
6488 case X86::VFMULCSHZrrkz:
6489 return Subtarget.hasMULCFalseDeps();
6490 case X86::VPERMDYrm:
6491 case X86::VPERMDYrr:
6492 case X86::VPERMQYmi:
6493 case X86::VPERMQYri:
6494 case X86::VPERMPSYrm:
6495 case X86::VPERMPSYrr:
6496 case X86::VPERMPDYmi:
6497 case X86::VPERMPDYri:
6498 case X86::VPERMDZ256rm:
6499 case X86::VPERMDZ256rmb:
6500 case X86::VPERMDZ256rmbkz:
6501 case X86::VPERMDZ256rmkz:
6502 case X86::VPERMDZ256rr:
6503 case X86::VPERMDZ256rrkz:
6504 case X86::VPERMDZrm:
6505 case X86::VPERMDZrmb:
6506 case X86::VPERMDZrmbkz:
6507 case X86::VPERMDZrmkz:
6508 case X86::VPERMDZrr:
6509 case X86::VPERMDZrrkz:
6510 case X86::VPERMQZ256mbi:
6511 case X86::VPERMQZ256mbikz:
6512 case X86::VPERMQZ256mi:
6513 case X86::VPERMQZ256mikz:
6514 case X86::VPERMQZ256ri:
6515 case X86::VPERMQZ256rikz:
6516 case X86::VPERMQZ256rm:
6517 case X86::VPERMQZ256rmb:
6518 case X86::VPERMQZ256rmbkz:
6519 case X86::VPERMQZ256rmkz:
6520 case X86::VPERMQZ256rr:
6521 case X86::VPERMQZ256rrkz:
6522 case X86::VPERMQZmbi:
6523 case X86::VPERMQZmbikz:
6524 case X86::VPERMQZmi:
6525 case X86::VPERMQZmikz:
6526 case X86::VPERMQZri:
6527 case X86::VPERMQZrikz:
6528 case X86::VPERMQZrm:
6529 case X86::VPERMQZrmb:
6530 case X86::VPERMQZrmbkz:
6531 case X86::VPERMQZrmkz:
6532 case X86::VPERMQZrr:
6533 case X86::VPERMQZrrkz:
6534 case X86::VPERMPSZ256rm:
6535 case X86::VPERMPSZ256rmb:
6536 case X86::VPERMPSZ256rmbkz:
6537 case X86::VPERMPSZ256rmkz:
6538 case X86::VPERMPSZ256rr:
6539 case X86::VPERMPSZ256rrkz:
6540 case X86::VPERMPSZrm:
6541 case X86::VPERMPSZrmb:
6542 case X86::VPERMPSZrmbkz:
6543 case X86::VPERMPSZrmkz:
6544 case X86::VPERMPSZrr:
6545 case X86::VPERMPSZrrkz:
6546 case X86::VPERMPDZ256mbi:
6547 case X86::VPERMPDZ256mbikz:
6548 case X86::VPERMPDZ256mi:
6549 case X86::VPERMPDZ256mikz:
6550 case X86::VPERMPDZ256ri:
6551 case X86::VPERMPDZ256rikz:
6552 case X86::VPERMPDZ256rm:
6553 case X86::VPERMPDZ256rmb:
6554 case X86::VPERMPDZ256rmbkz:
6555 case X86::VPERMPDZ256rmkz:
6556 case X86::VPERMPDZ256rr:
6557 case X86::VPERMPDZ256rrkz:
6558 case X86::VPERMPDZmbi:
6559 case X86::VPERMPDZmbikz:
6560 case X86::VPERMPDZmi:
6561 case X86::VPERMPDZmikz:
6562 case X86::VPERMPDZri:
6563 case X86::VPERMPDZrikz:
6564 case X86::VPERMPDZrm:
6565 case X86::VPERMPDZrmb:
6566 case X86::VPERMPDZrmbkz:
6567 case X86::VPERMPDZrmkz:
6568 case X86::VPERMPDZrr:
6569 case X86::VPERMPDZrrkz:
6570 return Subtarget.hasPERMFalseDeps();
6571 case X86::VRANGEPDZ128rmbi:
6572 case X86::VRANGEPDZ128rmbikz:
6573 case X86::VRANGEPDZ128rmi:
6574 case X86::VRANGEPDZ128rmikz:
6575 case X86::VRANGEPDZ128rri:
6576 case X86::VRANGEPDZ128rrikz:
6577 case X86::VRANGEPDZ256rmbi:
6578 case X86::VRANGEPDZ256rmbikz:
6579 case X86::VRANGEPDZ256rmi:
6580 case X86::VRANGEPDZ256rmikz:
6581 case X86::VRANGEPDZ256rri:
6582 case X86::VRANGEPDZ256rrikz:
6583 case X86::VRANGEPDZrmbi:
6584 case X86::VRANGEPDZrmbikz:
6585 case X86::VRANGEPDZrmi:
6586 case X86::VRANGEPDZrmikz:
6587 case X86::VRANGEPDZrri:
6588 case X86::VRANGEPDZrrib:
6589 case X86::VRANGEPDZrribkz:
6590 case X86::VRANGEPDZrrikz:
6591 case X86::VRANGEPSZ128rmbi:
6592 case X86::VRANGEPSZ128rmbikz:
6593 case X86::VRANGEPSZ128rmi:
6594 case X86::VRANGEPSZ128rmikz:
6595 case X86::VRANGEPSZ128rri:
6596 case X86::VRANGEPSZ128rrikz:
6597 case X86::VRANGEPSZ256rmbi:
6598 case X86::VRANGEPSZ256rmbikz:
6599 case X86::VRANGEPSZ256rmi:
6600 case X86::VRANGEPSZ256rmikz:
6601 case X86::VRANGEPSZ256rri:
6602 case X86::VRANGEPSZ256rrikz:
6603 case X86::VRANGEPSZrmbi:
6604 case X86::VRANGEPSZrmbikz:
6605 case X86::VRANGEPSZrmi:
6606 case X86::VRANGEPSZrmikz:
6607 case X86::VRANGEPSZrri:
6608 case X86::VRANGEPSZrrib:
6609 case X86::VRANGEPSZrribkz:
6610 case X86::VRANGEPSZrrikz:
6611 case X86::VRANGESDZrmi:
6612 case X86::VRANGESDZrmikz:
6613 case X86::VRANGESDZrri:
6614 case X86::VRANGESDZrrib:
6615 case X86::VRANGESDZrribkz:
6616 case X86::VRANGESDZrrikz:
6617 case X86::VRANGESSZrmi:
6618 case X86::VRANGESSZrmikz:
6619 case X86::VRANGESSZrri:
6620 case X86::VRANGESSZrrib:
6621 case X86::VRANGESSZrribkz:
6622 case X86::VRANGESSZrrikz:
6623 return Subtarget.hasRANGEFalseDeps();
6624 case X86::VGETMANTSSZrmi:
6625 case X86::VGETMANTSSZrmikz:
6626 case X86::VGETMANTSSZrri:
6627 case X86::VGETMANTSSZrrib:
6628 case X86::VGETMANTSSZrribkz:
6629 case X86::VGETMANTSSZrrikz:
6630 case X86::VGETMANTSDZrmi:
6631 case X86::VGETMANTSDZrmikz:
6632 case X86::VGETMANTSDZrri:
6633 case X86::VGETMANTSDZrrib:
6634 case X86::VGETMANTSDZrribkz:
6635 case X86::VGETMANTSDZrrikz:
6636 case X86::VGETMANTSHZrmi:
6637 case X86::VGETMANTSHZrmikz:
6638 case X86::VGETMANTSHZrri:
6639 case X86::VGETMANTSHZrrib:
6640 case X86::VGETMANTSHZrribkz:
6641 case X86::VGETMANTSHZrrikz:
6642 case X86::VGETMANTPSZ128rmbi:
6643 case X86::VGETMANTPSZ128rmbikz:
6644 case X86::VGETMANTPSZ128rmi:
6645 case X86::VGETMANTPSZ128rmikz:
6646 case X86::VGETMANTPSZ256rmbi:
6647 case X86::VGETMANTPSZ256rmbikz:
6648 case X86::VGETMANTPSZ256rmi:
6649 case X86::VGETMANTPSZ256rmikz:
6650 case X86::VGETMANTPSZrmbi:
6651 case X86::VGETMANTPSZrmbikz:
6652 case X86::VGETMANTPSZrmi:
6653 case X86::VGETMANTPSZrmikz:
6654 case X86::VGETMANTPDZ128rmbi:
6655 case X86::VGETMANTPDZ128rmbikz:
6656 case X86::VGETMANTPDZ128rmi:
6657 case X86::VGETMANTPDZ128rmikz:
6658 case X86::VGETMANTPDZ256rmbi:
6659 case X86::VGETMANTPDZ256rmbikz:
6660 case X86::VGETMANTPDZ256rmi:
6661 case X86::VGETMANTPDZ256rmikz:
6662 case X86::VGETMANTPDZrmbi:
6663 case X86::VGETMANTPDZrmbikz:
6664 case X86::VGETMANTPDZrmi:
6665 case X86::VGETMANTPDZrmikz:
6666 return Subtarget.hasGETMANTFalseDeps();
6667 case X86::VPMULLQZ128rm:
6668 case X86::VPMULLQZ128rmb:
6669 case X86::VPMULLQZ128rmbkz:
6670 case X86::VPMULLQZ128rmkz:
6671 case X86::VPMULLQZ128rr:
6672 case X86::VPMULLQZ128rrkz:
6673 case X86::VPMULLQZ256rm:
6674 case X86::VPMULLQZ256rmb:
6675 case X86::VPMULLQZ256rmbkz:
6676 case X86::VPMULLQZ256rmkz:
6677 case X86::VPMULLQZ256rr:
6678 case X86::VPMULLQZ256rrkz:
6679 case X86::VPMULLQZrm:
6680 case X86::VPMULLQZrmb:
6681 case X86::VPMULLQZrmbkz:
6682 case X86::VPMULLQZrmkz:
6683 case X86::VPMULLQZrr:
6684 case X86::VPMULLQZrrkz:
6685 return Subtarget.hasMULLQFalseDeps();
6687 case X86::POPCNT32rm:
6688 case X86::POPCNT32rr:
6689 case X86::POPCNT64rm:
6690 case X86::POPCNT64rr:
6691 return Subtarget.hasPOPCNTFalseDeps();
6692 case X86::LZCNT32rm:
6693 case X86::LZCNT32rr:
6694 case X86::LZCNT64rm:
6695 case X86::LZCNT64rr:
6696 case X86::TZCNT32rm:
6697 case X86::TZCNT32rr:
6698 case X86::TZCNT64rm:
6699 case X86::TZCNT64rr:
6700 return Subtarget.hasLZCNTFalseDeps();
6717 if (Reg.isVirtual()) {
6718 if (MO.
readsReg() ||
MI.readsVirtualRegister(Reg))
6721 if (
MI.readsRegister(Reg,
TRI))
6736 bool ForLoadFold =
false) {
6739 case X86::MMX_PUNPCKHBWrr:
6740 case X86::MMX_PUNPCKHWDrr:
6741 case X86::MMX_PUNPCKHDQrr:
6742 case X86::MMX_PUNPCKLBWrr:
6743 case X86::MMX_PUNPCKLWDrr:
6744 case X86::MMX_PUNPCKLDQrr:
6745 case X86::MOVHLPSrr:
6746 case X86::PACKSSWBrr:
6747 case X86::PACKUSWBrr:
6748 case X86::PACKSSDWrr:
6749 case X86::PACKUSDWrr:
6750 case X86::PUNPCKHBWrr:
6751 case X86::PUNPCKLBWrr:
6752 case X86::PUNPCKHWDrr:
6753 case X86::PUNPCKLWDrr:
6754 case X86::PUNPCKHDQrr:
6755 case X86::PUNPCKLDQrr:
6756 case X86::PUNPCKHQDQrr:
6757 case X86::PUNPCKLQDQrr:
6758 case X86::SHUFPDrri:
6759 case X86::SHUFPSrri:
6765 return OpNum == 2 && !ForLoadFold;
6767 case X86::VMOVLHPSrr:
6768 case X86::VMOVLHPSZrr:
6769 case X86::VPACKSSWBrr:
6770 case X86::VPACKUSWBrr:
6771 case X86::VPACKSSDWrr:
6772 case X86::VPACKUSDWrr:
6773 case X86::VPACKSSWBZ128rr:
6774 case X86::VPACKUSWBZ128rr:
6775 case X86::VPACKSSDWZ128rr:
6776 case X86::VPACKUSDWZ128rr:
6777 case X86::VPERM2F128rr:
6778 case X86::VPERM2I128rr:
6779 case X86::VSHUFF32X4Z256rri:
6780 case X86::VSHUFF32X4Zrri:
6781 case X86::VSHUFF64X2Z256rri:
6782 case X86::VSHUFF64X2Zrri:
6783 case X86::VSHUFI32X4Z256rri:
6784 case X86::VSHUFI32X4Zrri:
6785 case X86::VSHUFI64X2Z256rri:
6786 case X86::VSHUFI64X2Zrri:
6787 case X86::VPUNPCKHBWrr:
6788 case X86::VPUNPCKLBWrr:
6789 case X86::VPUNPCKHBWYrr:
6790 case X86::VPUNPCKLBWYrr:
6791 case X86::VPUNPCKHBWZ128rr:
6792 case X86::VPUNPCKLBWZ128rr:
6793 case X86::VPUNPCKHBWZ256rr:
6794 case X86::VPUNPCKLBWZ256rr:
6795 case X86::VPUNPCKHBWZrr:
6796 case X86::VPUNPCKLBWZrr:
6797 case X86::VPUNPCKHWDrr:
6798 case X86::VPUNPCKLWDrr:
6799 case X86::VPUNPCKHWDYrr:
6800 case X86::VPUNPCKLWDYrr:
6801 case X86::VPUNPCKHWDZ128rr:
6802 case X86::VPUNPCKLWDZ128rr:
6803 case X86::VPUNPCKHWDZ256rr:
6804 case X86::VPUNPCKLWDZ256rr:
6805 case X86::VPUNPCKHWDZrr:
6806 case X86::VPUNPCKLWDZrr:
6807 case X86::VPUNPCKHDQrr:
6808 case X86::VPUNPCKLDQrr:
6809 case X86::VPUNPCKHDQYrr:
6810 case X86::VPUNPCKLDQYrr:
6811 case X86::VPUNPCKHDQZ128rr:
6812 case X86::VPUNPCKLDQZ128rr:
6813 case X86::VPUNPCKHDQZ256rr:
6814 case X86::VPUNPCKLDQZ256rr:
6815 case X86::VPUNPCKHDQZrr:
6816 case X86::VPUNPCKLDQZrr:
6817 case X86::VPUNPCKHQDQrr:
6818 case X86::VPUNPCKLQDQrr:
6819 case X86::VPUNPCKHQDQYrr:
6820 case X86::VPUNPCKLQDQYrr:
6821 case X86::VPUNPCKHQDQZ128rr:
6822 case X86::VPUNPCKLQDQZ128rr:
6823 case X86::VPUNPCKHQDQZ256rr:
6824 case X86::VPUNPCKLQDQZ256rr:
6825 case X86::VPUNPCKHQDQZrr:
6826 case X86::VPUNPCKLQDQZrr:
6830 return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
6832 case X86::VCVTSI2SSrr:
6833 case X86::VCVTSI2SSrm:
6834 case X86::VCVTSI2SSrr_Int:
6835 case X86::VCVTSI2SSrm_Int:
6836 case X86::VCVTSI642SSrr:
6837 case X86::VCVTSI642SSrm:
6838 case X86::VCVTSI642SSrr_Int:
6839 case X86::VCVTSI642SSrm_Int:
6840 case X86::VCVTSI2SDrr:
6841 case X86::VCVTSI2SDrm:
6842 case X86::VCVTSI2SDrr_Int:
6843 case X86::VCVTSI2SDrm_Int:
6844 case X86::VCVTSI642SDrr:
6845 case X86::VCVTSI642SDrm:
6846 case X86::VCVTSI642SDrr_Int:
6847 case X86::VCVTSI642SDrm_Int:
6849 case X86::VCVTSI2SSZrr:
6850 case X86::VCVTSI2SSZrm:
6851 case X86::VCVTSI2SSZrr_Int:
6852 case X86::VCVTSI2SSZrrb_Int:
6853 case X86::VCVTSI2SSZrm_Int:
6854 case X86::VCVTSI642SSZrr:
6855 case X86::VCVTSI642SSZrm:
6856 case X86::VCVTSI642SSZrr_Int:
6857 case X86::VCVTSI642SSZrrb_Int:
6858 case X86::VCVTSI642SSZrm_Int:
6859 case X86::VCVTSI2SDZrr:
6860 case X86::VCVTSI2SDZrm:
6861 case X86::VCVTSI2SDZrr_Int:
6862 case X86::VCVTSI2SDZrm_Int:
6863 case X86::VCVTSI642SDZrr:
6864 case X86::VCVTSI642SDZrm:
6865 case X86::VCVTSI642SDZrr_Int:
6866 case X86::VCVTSI642SDZrrb_Int:
6867 case X86::VCVTSI642SDZrm_Int:
6868 case X86::VCVTUSI2SSZrr:
6869 case X86::VCVTUSI2SSZrm:
6870 case X86::VCVTUSI2SSZrr_Int:
6871 case X86::VCVTUSI2SSZrrb_Int:
6872 case X86::VCVTUSI2SSZrm_Int:
6873 case X86::VCVTUSI642SSZrr:
6874 case X86::VCVTUSI642SSZrm:
6875 case X86::VCVTUSI642SSZrr_Int:
6876 case X86::VCVTUSI642SSZrrb_Int:
6877 case X86::VCVTUSI642SSZrm_Int:
6878 case X86::VCVTUSI2SDZrr:
6879 case X86::VCVTUSI2SDZrm:
6880 case X86::VCVTUSI2SDZrr_Int:
6881 case X86::VCVTUSI2SDZrm_Int:
6882 case X86::VCVTUSI642SDZrr:
6883 case X86::VCVTUSI642SDZrm:
6884 case X86::VCVTUSI642SDZrr_Int:
6885 case X86::VCVTUSI642SDZrrb_Int:
6886 case X86::VCVTUSI642SDZrm_Int:
6887 case X86::VCVTSI2SHZrr:
6888 case X86::VCVTSI2SHZrm:
6889 case X86::VCVTSI2SHZrr_Int:
6890 case X86::VCVTSI2SHZrrb_Int:
6891 case X86::VCVTSI2SHZrm_Int:
6892 case X86::VCVTSI642SHZrr:
6893 case X86::VCVTSI642SHZrm:
6894 case X86::VCVTSI642SHZrr_Int:
6895 case X86::VCVTSI642SHZrrb_Int:
6896 case X86::VCVTSI642SHZrm_Int:
6897 case X86::VCVTUSI2SHZrr:
6898 case X86::VCVTUSI2SHZrm:
6899 case X86::VCVTUSI2SHZrr_Int:
6900 case X86::VCVTUSI2SHZrrb_Int:
6901 case X86::VCVTUSI2SHZrm_Int:
6902 case X86::VCVTUSI642SHZrr:
6903 case X86::VCVTUSI642SHZrm:
6904 case X86::VCVTUSI642SHZrr_Int:
6905 case X86::VCVTUSI642SHZrrb_Int:
6906 case X86::VCVTUSI642SHZrm_Int:
6909 return OpNum == 1 && !ForLoadFold;
6910 case X86::VCVTSD2SSrr:
6911 case X86::VCVTSD2SSrm:
6912 case X86::VCVTSD2SSrr_Int:
6913 case X86::VCVTSD2SSrm_Int:
6914 case X86::VCVTSS2SDrr:
6915 case X86::VCVTSS2SDrm:
6916 case X86::VCVTSS2SDrr_Int:
6917 case X86::VCVTSS2SDrm_Int:
6919 case X86::VRCPSSr_Int:
6921 case X86::VRCPSSm_Int:
6922 case X86::VROUNDSDri:
6923 case X86::VROUNDSDmi:
6924 case X86::VROUNDSDri_Int:
6925 case X86::VROUNDSDmi_Int:
6926 case X86::VROUNDSSri:
6927 case X86::VROUNDSSmi:
6928 case X86::VROUNDSSri_Int:
6929 case X86::VROUNDSSmi_Int:
6930 case X86::VRSQRTSSr:
6931 case X86::VRSQRTSSr_Int:
6932 case X86::VRSQRTSSm:
6933 case X86::VRSQRTSSm_Int:
6935 case X86::VSQRTSSr_Int:
6937 case X86::VSQRTSSm_Int:
6939 case X86::VSQRTSDr_Int:
6941 case X86::VSQRTSDm_Int:
6943 case X86::VCVTSD2SSZrr:
6944 case X86::VCVTSD2SSZrr_Int:
6945 case X86::VCVTSD2SSZrrb_Int:
6946 case X86::VCVTSD2SSZrm:
6947 case X86::VCVTSD2SSZrm_Int:
6948 case X86::VCVTSS2SDZrr:
6949 case X86::VCVTSS2SDZrr_Int:
6950 case X86::VCVTSS2SDZrrb_Int:
6951 case X86::VCVTSS2SDZrm:
6952 case X86::VCVTSS2SDZrm_Int:
6953 case X86::VGETEXPSDZr:
6954 case X86::VGETEXPSDZrb:
6955 case X86::VGETEXPSDZm:
6956 case X86::VGETEXPSSZr:
6957 case X86::VGETEXPSSZrb:
6958 case X86::VGETEXPSSZm:
6959 case X86::VGETMANTSDZrri:
6960 case X86::VGETMANTSDZrrib:
6961 case X86::VGETMANTSDZrmi:
6962 case X86::VGETMANTSSZrri:
6963 case X86::VGETMANTSSZrrib:
6964 case X86::VGETMANTSSZrmi:
6965 case X86::VRNDSCALESDZr:
6966 case X86::VRNDSCALESDZr_Int:
6967 case X86::VRNDSCALESDZrb_Int:
6968 case X86::VRNDSCALESDZm:
6969 case X86::VRNDSCALESDZm_Int:
6970 case X86::VRNDSCALESSZr:
6971 case X86::VRNDSCALESSZr_Int:
6972 case X86::VRNDSCALESSZrb_Int:
6973 case X86::VRNDSCALESSZm:
6974 case X86::VRNDSCALESSZm_Int:
6975 case X86::VRCP14SDZrr:
6976 case X86::VRCP14SDZrm:
6977 case X86::VRCP14SSZrr:
6978 case X86::VRCP14SSZrm:
6979 case X86::VRCPSHZrr:
6980 case X86::VRCPSHZrm:
6981 case X86::VRSQRTSHZrr:
6982 case X86::VRSQRTSHZrm:
6983 case X86::VREDUCESHZrmi:
6984 case X86::VREDUCESHZrri:
6985 case X86::VREDUCESHZrrib:
6986 case X86::VGETEXPSHZr:
6987 case X86::VGETEXPSHZrb:
6988 case X86::VGETEXPSHZm:
6989 case X86::VGETMANTSHZrri:
6990 case X86::VGETMANTSHZrrib:
6991 case X86::VGETMANTSHZrmi:
6992 case X86::VRNDSCALESHZr:
6993 case X86::VRNDSCALESHZr_Int:
6994 case X86::VRNDSCALESHZrb_Int:
6995 case X86::VRNDSCALESHZm:
6996 case X86::VRNDSCALESHZm_Int:
6997 case X86::VSQRTSHZr:
6998 case X86::VSQRTSHZr_Int:
6999 case X86::VSQRTSHZrb_Int:
7000 case X86::VSQRTSHZm:
7001 case X86::VSQRTSHZm_Int:
7002 case X86::VRCP28SDZr:
7003 case X86::VRCP28SDZrb:
7004 case X86::VRCP28SDZm:
7005 case X86::VRCP28SSZr:
7006 case X86::VRCP28SSZrb:
7007 case X86::VRCP28SSZm:
7008 case X86::VREDUCESSZrmi:
7009 case X86::VREDUCESSZrri:
7010 case X86::VREDUCESSZrrib:
7011 case X86::VRSQRT14SDZrr:
7012 case X86::VRSQRT14SDZrm:
7013 case X86::VRSQRT14SSZrr:
7014 case X86::VRSQRT14SSZrm:
7015 case X86::VRSQRT28SDZr:
7016 case X86::VRSQRT28SDZrb:
7017 case X86::VRSQRT28SDZm:
7018 case X86::VRSQRT28SSZr:
7019 case X86::VRSQRT28SSZrb:
7020 case X86::VRSQRT28SSZm:
7021 case X86::VSQRTSSZr:
7022 case X86::VSQRTSSZr_Int:
7023 case X86::VSQRTSSZrb_Int:
7024 case X86::VSQRTSSZm:
7025 case X86::VSQRTSSZm_Int:
7026 case X86::VSQRTSDZr:
7027 case X86::VSQRTSDZr_Int:
7028 case X86::VSQRTSDZrb_Int:
7029 case X86::VSQRTSDZm:
7030 case X86::VSQRTSDZm_Int:
7031 case X86::VCVTSD2SHZrr:
7032 case X86::VCVTSD2SHZrr_Int:
7033 case X86::VCVTSD2SHZrrb_Int:
7034 case X86::VCVTSD2SHZrm:
7035 case X86::VCVTSD2SHZrm_Int:
7036 case X86::VCVTSS2SHZrr:
7037 case X86::VCVTSS2SHZrr_Int:
7038 case X86::VCVTSS2SHZrrb_Int:
7039 case X86::VCVTSS2SHZrm:
7040 case X86::VCVTSS2SHZrm_Int:
7041 case X86::VCVTSH2SDZrr:
7042 case X86::VCVTSH2SDZrr_Int:
7043 case X86::VCVTSH2SDZrrb_Int:
7044 case X86::VCVTSH2SDZrm:
7045 case X86::VCVTSH2SDZrm_Int:
7046 case X86::VCVTSH2SSZrr:
7047 case X86::VCVTSH2SSZrr_Int:
7048 case X86::VCVTSH2SSZrrb_Int:
7049 case X86::VCVTSH2SSZrm:
7050 case X86::VCVTSH2SSZrm_Int:
7052 case X86::VMOVSSZrrk:
7053 case X86::VMOVSDZrrk:
7054 return OpNum == 3 && !ForLoadFold;
7055 case X86::VMOVSSZrrkz:
7056 case X86::VMOVSDZrrkz:
7057 return OpNum == 2 && !ForLoadFold;
7089 Register Reg =
MI.getOperand(OpNum).getReg();
7091 if (
MI.killsRegister(Reg,
TRI))
7094 if (X86::VR128RegClass.
contains(Reg)) {
7097 unsigned Opc = Subtarget.
hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
7101 MI.addRegisterKilled(Reg,
TRI,
true);
7102 }
else if (X86::VR256RegClass.
contains(Reg)) {
7105 Register XReg =
TRI->getSubReg(Reg, X86::sub_xmm);
7110 MI.addRegisterKilled(Reg,
TRI,
true);
7111 }
else if (X86::VR128XRegClass.
contains(Reg)) {
7113 if (!Subtarget.hasVLX())
7116 BuildMI(*
MI.getParent(),
MI,
MI.getDebugLoc(),
get(X86::VPXORDZ128rr), Reg)
7119 MI.addRegisterKilled(Reg,
TRI,
true);
7120 }
else if (X86::VR256XRegClass.
contains(Reg) ||
7121 X86::VR512RegClass.
contains(Reg)) {
7123 if (!Subtarget.hasVLX())
7127 Register XReg =
TRI->getSubReg(Reg, X86::sub_xmm);
7128 BuildMI(*
MI.getParent(),
MI,
MI.getDebugLoc(),
get(X86::VPXORDZ128rr), XReg)
7132 MI.addRegisterKilled(Reg,
TRI,
true);
7133 }
else if (X86::GR64RegClass.
contains(Reg)) {
7136 Register XReg =
TRI->getSubReg(Reg, X86::sub_32bit);
7141 MI.addRegisterKilled(Reg,
TRI,
true);
7142 }
else if (X86::GR32RegClass.
contains(Reg)) {
7146 MI.addRegisterKilled(Reg,
TRI,
true);
7151 int PtrOffset = 0) {
7152 unsigned NumAddrOps = MOs.
size();
7154 if (NumAddrOps < 4) {
7156 for (
unsigned i = 0; i != NumAddrOps; ++i)
7162 assert(MOs.
size() == 5 &&
"Unexpected memory operand list length");
7163 for (
unsigned i = 0; i != NumAddrOps; ++i) {
7165 if (i == 3 && PtrOffset != 0) {
7186 if (!Reg.isVirtual())
7189 auto *NewRC =
MRI.constrainRegClass(
7193 dbgs() <<
"WARNING: Unable to update register constraint for operand "
7194 <<
Idx <<
" of instruction:\n";
7213 unsigned NumOps =
MI.getDesc().getNumOperands() - 2;
7214 for (
unsigned i = 0; i != NumOps; ++i) {
7233 int PtrOffset = 0) {
7239 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
7242 assert(MO.
isReg() &&
"Expected to fold into reg operand!");
7266 MI.getDebugLoc(),
TII.get(Opcode));
7275 switch (
MI.getOpcode()) {
7276 case X86::INSERTPSrr:
7277 case X86::VINSERTPSrr:
7278 case X86::VINSERTPSZrr:
7282 unsigned Imm =
MI.getOperand(
MI.getNumOperands() - 1).getImm();
7283 unsigned ZMask =
Imm & 15;
7284 unsigned DstIdx = (
Imm >> 4) & 3;
7285 unsigned SrcIdx = (
Imm >> 6) & 3;
7289 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7290 if ((
Size == 0 ||
Size >= 16) && RCSize >= 16 &&
7291 (
MI.getOpcode() != X86::INSERTPSrr || Alignment >=
Align(4))) {
7292 int PtrOffset = SrcIdx * 4;
7293 unsigned NewImm = (DstIdx << 4) | ZMask;
7294 unsigned NewOpCode =
7295 (
MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm
7296 : (
MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm
7299 fuseInst(MF, NewOpCode, OpNum, MOs, InsertPt,
MI, *
this, PtrOffset);
7305 case X86::MOVHLPSrr:
7306 case X86::VMOVHLPSrr:
7307 case X86::VMOVHLPSZrr:
7314 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7315 if ((
Size == 0 ||
Size >= 16) && RCSize >= 16 && Alignment >=
Align(8)) {
7316 unsigned NewOpCode =
7317 (
MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm
7318 : (
MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm
7321 fuseInst(MF, NewOpCode, OpNum, MOs, InsertPt,
MI, *
this, 8);
7326 case X86::UNPCKLPDrr:
7333 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7334 if ((
Size == 0 ||
Size >= 16) && RCSize >= 16 && Alignment <
Align(16)) {
7336 fuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt,
MI, *
this);
7343 makeM0Inst(*
this, (
Size == 4) ? X86::MOV32mi : X86::MOV64mi32, MOs,
7355 !
MI.getOperand(1).isReg())
7363 if (
MI.getOperand(1).isUndef())
7372 unsigned Idx1)
const {
7373 unsigned Idx2 = CommuteAnyOperandIndex;
7377 bool HasDef =
MI.getDesc().getNumDefs();
7379 Register Reg1 =
MI.getOperand(Idx1).getReg();
7380 Register Reg2 =
MI.getOperand(Idx2).getReg();
7381 bool Tied1 = 0 ==
MI.getDesc().getOperandConstraint(Idx1,
MCOI::TIED_TO);
7382 bool Tied2 = 0 ==
MI.getDesc().getOperandConstraint(Idx2,
MCOI::TIED_TO);
7386 if ((HasDef && Reg0 == Reg1 && Tied1) || (HasDef && Reg0 == Reg2 && Tied2))
7389 return commuteInstruction(
MI,
false, Idx1, Idx2) ? Idx2 : Idx1;
7394 dbgs() <<
"We failed to fuse operand " <<
Idx <<
" in " <<
MI;
7400 unsigned Size,
Align Alignment,
bool AllowCommute)
const {
7401 bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
7402 unsigned Opc =
MI.getOpcode();
7408 (Opc == X86::CALL32r || Opc == X86::CALL64r || Opc == X86::PUSH16r ||
7409 Opc == X86::PUSH32r || Opc == X86::PUSH64r))
7418 unsigned NumOps =
MI.getDesc().getNumOperands();
7419 bool IsTwoAddr = NumOps > 1 && OpNum < 2 &&
MI.getOperand(0).isReg() &&
7420 MI.getOperand(1).isReg() &&
7421 MI.getOperand(0).getReg() ==
MI.getOperand(1).getReg();
7425 if (Opc == X86::ADD32ri &&
7434 Opc != X86::ADD64rr)
7439 if (
MI.isCall() &&
MI.getCFIType())
7443 if (
auto *CustomMI = foldMemoryOperandCustom(MF,
MI, OpNum, MOs, InsertPt,
7459 unsigned Opcode =
I->DstOp;
7463 bool NarrowToMOV32rm =
false;
7467 unsigned RCSize =
TRI.getRegSizeInBits(*RC) / 8;
7475 if (Opcode != X86::MOV64rm || RCSize != 8 ||
Size != 4)
7477 if (
MI.getOperand(0).getSubReg() ||
MI.getOperand(1).getSubReg())
7479 Opcode = X86::MOV32rm;
7480 NarrowToMOV32rm =
true;
7490 :
fuseInst(MF, Opcode, OpNum, MOs, InsertPt,
MI, *
this);
7492 if (NarrowToMOV32rm) {
7508 unsigned CommuteOpIdx2 = commuteOperandsForFold(
MI, OpNum);
7509 if (CommuteOpIdx2 == OpNum) {
7519 commuteInstruction(
MI,
false, OpNum, CommuteOpIdx2);
7541 for (
auto Op : Ops) {
7546 if (
MI.getOpcode() == X86::MOV32r0 &&
SubReg == X86::sub_32bit)
7557 if (!RI.hasStackRealignment(MF))
7564 Size, Alignment,
true);
7566 if (Ops.
size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
7567 unsigned NewOpc = 0;
7568 unsigned RCSize = 0;
7569 unsigned Opc =
MI.getOpcode();
7576 NewOpc = X86::CMP8ri;
7580 NewOpc = X86::CMP16ri;
7584 NewOpc = X86::CMP32ri;
7588 NewOpc = X86::CMP64ri32;
7597 MI.setDesc(
get(NewOpc));
7598 MI.getOperand(1).ChangeToImmediate(0);
7599 }
else if (Ops.
size() != 1)
7627 unsigned RegSize =
TRI.getRegSizeInBits(*RC);
7629 if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm ||
7630 Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt ||
7631 Opc == X86::VMOVSSZrm_alt) &&
7637 case X86::CVTSS2SDrr_Int:
7638 case X86::VCVTSS2SDrr_Int:
7639 case X86::VCVTSS2SDZrr_Int:
7640 case X86::VCVTSS2SDZrr_Intk:
7641 case X86::VCVTSS2SDZrr_Intkz:
7642 case X86::CVTSS2SIrr_Int:
7643 case X86::CVTSS2SI64rr_Int:
7644 case X86::VCVTSS2SIrr_Int:
7645 case X86::VCVTSS2SI64rr_Int:
7646 case X86::VCVTSS2SIZrr_Int:
7647 case X86::VCVTSS2SI64Zrr_Int:
7648 case X86::CVTTSS2SIrr_Int:
7649 case X86::CVTTSS2SI64rr_Int:
7650 case X86::VCVTTSS2SIrr_Int:
7651 case X86::VCVTTSS2SI64rr_Int:
7652 case X86::VCVTTSS2SIZrr_Int:
7653 case X86::VCVTTSS2SI64Zrr_Int:
7654 case X86::VCVTSS2USIZrr_Int:
7655 case X86::VCVTSS2USI64Zrr_Int:
7656 case X86::VCVTTSS2USIZrr_Int:
7657 case X86::VCVTTSS2USI64Zrr_Int:
7658 case X86::RCPSSr_Int:
7659 case X86::VRCPSSr_Int:
7660 case X86::RSQRTSSr_Int:
7661 case X86::VRSQRTSSr_Int:
7662 case X86::ROUNDSSri_Int:
7663 case X86::VROUNDSSri_Int:
7664 case X86::COMISSrr_Int:
7665 case X86::VCOMISSrr_Int:
7666 case X86::VCOMISSZrr_Int:
7667 case X86::UCOMISSrr_Int:
7668 case X86::VUCOMISSrr_Int:
7669 case X86::VUCOMISSZrr_Int:
7670 case X86::ADDSSrr_Int:
7671 case X86::VADDSSrr_Int:
7672 case X86::VADDSSZrr_Int:
7673 case X86::CMPSSrri_Int:
7674 case X86::VCMPSSrri_Int:
7675 case X86::VCMPSSZrri_Int:
7676 case X86::DIVSSrr_Int:
7677 case X86::VDIVSSrr_Int:
7678 case X86::VDIVSSZrr_Int:
7679 case X86::MAXSSrr_Int:
7680 case X86::VMAXSSrr_Int:
7681 case X86::VMAXSSZrr_Int:
7682 case X86::MINSSrr_Int:
7683 case X86::VMINSSrr_Int:
7684 case X86::VMINSSZrr_Int:
7685 case X86::MULSSrr_Int:
7686 case X86::VMULSSrr_Int:
7687 case X86::VMULSSZrr_Int:
7688 case X86::SQRTSSr_Int:
7689 case X86::VSQRTSSr_Int:
7690 case X86::VSQRTSSZr_Int:
7691 case X86::SUBSSrr_Int:
7692 case X86::VSUBSSrr_Int:
7693 case X86::VSUBSSZrr_Int:
7694 case X86::VADDSSZrr_Intk:
7695 case X86::VADDSSZrr_Intkz:
7696 case X86::VCMPSSZrri_Intk:
7697 case X86::VDIVSSZrr_Intk:
7698 case X86::VDIVSSZrr_Intkz:
7699 case X86::VMAXSSZrr_Intk:
7700 case X86::VMAXSSZrr_Intkz:
7701 case X86::VMINSSZrr_Intk:
7702 case X86::VMINSSZrr_Intkz:
7703 case X86::VMULSSZrr_Intk:
7704 case X86::VMULSSZrr_Intkz:
7705 case X86::VSQRTSSZr_Intk:
7706 case X86::VSQRTSSZr_Intkz:
7707 case X86::VSUBSSZrr_Intk:
7708 case X86::VSUBSSZrr_Intkz:
7709 case X86::VFMADDSS4rr_Int:
7710 case X86::VFNMADDSS4rr_Int:
7711 case X86::VFMSUBSS4rr_Int:
7712 case X86::VFNMSUBSS4rr_Int:
7713 case X86::VFMADD132SSr_Int:
7714 case X86::VFNMADD132SSr_Int:
7715 case X86::VFMADD213SSr_Int:
7716 case X86::VFNMADD213SSr_Int:
7717 case X86::VFMADD231SSr_Int:
7718 case X86::VFNMADD231SSr_Int:
7719 case X86::VFMSUB132SSr_Int:
7720 case X86::VFNMSUB132SSr_Int:
7721 case X86::VFMSUB213SSr_Int:
7722 case X86::VFNMSUB213SSr_Int:
7723 case X86::VFMSUB231SSr_Int:
7724 case X86::VFNMSUB231SSr_Int:
7725 case X86::VFMADD132SSZr_Int:
7726 case X86::VFNMADD132SSZr_Int:
7727 case X86::VFMADD213SSZr_Int:
7728 case X86::VFNMADD213SSZr_Int:
7729 case X86::VFMADD231SSZr_Int:
7730 case X86::VFNMADD231SSZr_Int:
7731 case X86::VFMSUB132SSZr_Int:
7732 case X86::VFNMSUB132SSZr_Int:
7733 case X86::VFMSUB213SSZr_Int:
7734 case X86::VFNMSUB213SSZr_Int:
7735 case X86::VFMSUB231SSZr_Int:
7736 case X86::VFNMSUB231SSZr_Int:
7737 case X86::VFMADD132SSZr_Intk:
7738 case X86::VFNMADD132SSZr_Intk:
7739 case X86::VFMADD213SSZr_Intk:
7740 case X86::VFNMADD213SSZr_Intk:
7741 case X86::VFMADD231SSZr_Intk:
7742 case X86::VFNMADD231SSZr_Intk:
7743 case X86::VFMSUB132SSZr_Intk:
7744 case X86::VFNMSUB132SSZr_Intk:
7745 case X86::VFMSUB213SSZr_Intk:
7746 case X86::VFNMSUB213SSZr_Intk:
7747 case X86::VFMSUB231SSZr_Intk:
7748 case X86::VFNMSUB231SSZr_Intk:
7749 case X86::VFMADD132SSZr_Intkz:
7750 case X86::VFNMADD132SSZr_Intkz:
7751 case X86::VFMADD213SSZr_Intkz:
7752 case X86::VFNMADD213SSZr_Intkz:
7753 case X86::VFMADD231SSZr_Intkz:
7754 case X86::VFNMADD231SSZr_Intkz:
7755 case X86::VFMSUB132SSZr_Intkz:
7756 case X86::VFNMSUB132SSZr_Intkz:
7757 case X86::VFMSUB213SSZr_Intkz:
7758 case X86::VFNMSUB213SSZr_Intkz:
7759 case X86::VFMSUB231SSZr_Intkz:
7760 case X86::VFNMSUB231SSZr_Intkz:
7761 case X86::VFIXUPIMMSSZrri:
7762 case X86::VFIXUPIMMSSZrrik:
7763 case X86::VFIXUPIMMSSZrrikz:
7764 case X86::VFPCLASSSSZrr:
7765 case X86::VFPCLASSSSZrrk:
7766 case X86::VGETEXPSSZr:
7767 case X86::VGETEXPSSZrk:
7768 case X86::VGETEXPSSZrkz:
7769 case X86::VGETMANTSSZrri:
7770 case X86::VGETMANTSSZrrik:
7771 case X86::VGETMANTSSZrrikz:
7772 case X86::VRANGESSZrri:
7773 case X86::VRANGESSZrrik:
7774 case X86::VRANGESSZrrikz:
7775 case X86::VRCP14SSZrr:
7776 case X86::VRCP14SSZrrk:
7777 case X86::VRCP14SSZrrkz:
7778 case X86::VRCP28SSZr:
7779 case X86::VRCP28SSZrk:
7780 case X86::VRCP28SSZrkz:
7781 case X86::VREDUCESSZrri:
7782 case X86::VREDUCESSZrrik:
7783 case X86::VREDUCESSZrrikz:
7784 case X86::VRNDSCALESSZr_Int:
7785 case X86::VRNDSCALESSZr_Intk:
7786 case X86::VRNDSCALESSZr_Intkz:
7787 case X86::VRSQRT14SSZrr:
7788 case X86::VRSQRT14SSZrrk:
7789 case X86::VRSQRT14SSZrrkz:
7790 case X86::VRSQRT28SSZr:
7791 case X86::VRSQRT28SSZrk:
7792 case X86::VRSQRT28SSZrkz:
7793 case X86::VSCALEFSSZrr:
7794 case X86::VSCALEFSSZrrk:
7795 case X86::VSCALEFSSZrrkz:
7802 if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm ||
7803 Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt ||
7804 Opc == X86::VMOVSDZrm_alt) &&
7810 case X86::CVTSD2SSrr_Int:
7811 case X86::VCVTSD2SSrr_Int:
7812 case X86::VCVTSD2SSZrr_Int:
7813 case X86::VCVTSD2SSZrr_Intk:
7814 case X86::VCVTSD2SSZrr_Intkz:
7815 case X86::CVTSD2SIrr_Int:
7816 case X86::CVTSD2SI64rr_Int:
7817 case X86::VCVTSD2SIrr_Int:
7818 case X86::VCVTSD2SI64rr_Int:
7819 case X86::VCVTSD2SIZrr_Int:
7820 case X86::VCVTSD2SI64Zrr_Int:
7821 case X86::CVTTSD2SIrr_Int:
7822 case X86::CVTTSD2SI64rr_Int:
7823 case X86::VCVTTSD2SIrr_Int:
7824 case X86::VCVTTSD2SI64rr_Int:
7825 case X86::VCVTTSD2SIZrr_Int:
7826 case X86::VCVTTSD2SI64Zrr_Int:
7827 case X86::VCVTSD2USIZrr_Int:
7828 case X86::VCVTSD2USI64Zrr_Int:
7829 case X86::VCVTTSD2USIZrr_Int:
7830 case X86::VCVTTSD2USI64Zrr_Int:
7831 case X86::ROUNDSDri_Int:
7832 case X86::VROUNDSDri_Int:
7833 case X86::COMISDrr_Int:
7834 case X86::VCOMISDrr_Int:
7835 case X86::VCOMISDZrr_Int:
7836 case X86::UCOMISDrr_Int:
7837 case X86::VUCOMISDrr_Int:
7838 case X86::VUCOMISDZrr_Int:
7839 case X86::ADDSDrr_Int:
7840 case X86::VADDSDrr_Int:
7841 case X86::VADDSDZrr_Int:
7842 case X86::CMPSDrri_Int:
7843 case X86::VCMPSDrri_Int:
7844 case X86::VCMPSDZrri_Int:
7845 case X86::DIVSDrr_Int:
7846 case X86::VDIVSDrr_Int:
7847 case X86::VDIVSDZrr_Int:
7848 case X86::MAXSDrr_Int:
7849 case X86::VMAXSDrr_Int:
7850 case X86::VMAXSDZrr_Int:
7851 case X86::MINSDrr_Int:
7852 case X86::VMINSDrr_Int:
7853 case X86::VMINSDZrr_Int:
7854 case X86::MULSDrr_Int:
7855 case X86::VMULSDrr_Int:
7856 case X86::VMULSDZrr_Int:
7857 case X86::SQRTSDr_Int:
7858 case X86::VSQRTSDr_Int:
7859 case X86::VSQRTSDZr_Int:
7860 case X86::SUBSDrr_Int:
7861 case X86::VSUBSDrr_Int:
7862 case X86::VSUBSDZrr_Int:
7863 case X86::VADDSDZrr_Intk:
7864 case X86::VADDSDZrr_Intkz:
7865 case X86::VCMPSDZrri_Intk:
7866 case X86::VDIVSDZrr_Intk:
7867 case X86::VDIVSDZrr_Intkz:
7868 case X86::VMAXSDZrr_Intk:
7869 case X86::VMAXSDZrr_Intkz:
7870 case X86::VMINSDZrr_Intk:
7871 case X86::VMINSDZrr_Intkz:
7872 case X86::VMULSDZrr_Intk:
7873 case X86::VMULSDZrr_Intkz:
7874 case X86::VSQRTSDZr_Intk:
7875 case X86::VSQRTSDZr_Intkz:
7876 case X86::VSUBSDZrr_Intk:
7877 case X86::VSUBSDZrr_Intkz:
7878 case X86::VFMADDSD4rr_Int:
7879 case X86::VFNMADDSD4rr_Int:
7880 case X86::VFMSUBSD4rr_Int:
7881 case X86::VFNMSUBSD4rr_Int:
7882 case X86::VFMADD132SDr_Int:
7883 case X86::VFNMADD132SDr_Int:
7884 case X86::VFMADD213SDr_Int:
7885 case X86::VFNMADD213SDr_Int:
7886 case X86::VFMADD231SDr_Int:
7887 case X86::VFNMADD231SDr_Int:
7888 case X86::VFMSUB132SDr_Int:
7889 case X86::VFNMSUB132SDr_Int:
7890 case X86::VFMSUB213SDr_Int:
7891 case X86::VFNMSUB213SDr_Int:
7892 case X86::VFMSUB231SDr_Int:
7893 case X86::VFNMSUB231SDr_Int:
7894 case X86::VFMADD132SDZr_Int:
7895 case X86::VFNMADD132SDZr_Int:
7896 case X86::VFMADD213SDZr_Int:
7897 case X86::VFNMADD213SDZr_Int:
7898 case X86::VFMADD231SDZr_Int:
7899 case X86::VFNMADD231SDZr_Int:
7900 case X86::VFMSUB132SDZr_Int:
7901 case X86::VFNMSUB132SDZr_Int:
7902 case X86::VFMSUB213SDZr_Int:
7903 case X86::VFNMSUB213SDZr_Int:
7904 case X86::VFMSUB231SDZr_Int:
7905 case X86::VFNMSUB231SDZr_Int:
7906 case X86::VFMADD132SDZr_Intk:
7907 case X86::VFNMADD132SDZr_Intk:
7908 case X86::VFMADD213SDZr_Intk:
7909 case X86::VFNMADD213SDZr_Intk:
7910 case X86::VFMADD231SDZr_Intk:
7911 case X86::VFNMADD231SDZr_Intk:
7912 case X86::VFMSUB132SDZr_Intk:
7913 case X86::VFNMSUB132SDZr_Intk:
7914 case X86::VFMSUB213SDZr_Intk:
7915 case X86::VFNMSUB213SDZr_Intk:
7916 case X86::VFMSUB231SDZr_Intk:
7917 case X86::VFNMSUB231SDZr_Intk:
7918 case X86::VFMADD132SDZr_Intkz:
7919 case X86::VFNMADD132SDZr_Intkz:
7920 case X86::VFMADD213SDZr_Intkz:
7921 case X86::VFNMADD213SDZr_Intkz:
7922 case X86::VFMADD231SDZr_Intkz:
7923 case X86::VFNMADD231SDZr_Intkz:
7924 case X86::VFMSUB132SDZr_Intkz:
7925 case X86::VFNMSUB132SDZr_Intkz:
7926 case X86::VFMSUB213SDZr_Intkz:
7927 case X86::VFNMSUB213SDZr_Intkz:
7928 case X86::VFMSUB231SDZr_Intkz:
7929 case X86::VFNMSUB231SDZr_Intkz:
7930 case X86::VFIXUPIMMSDZrri:
7931 case X86::VFIXUPIMMSDZrrik:
7932 case X86::VFIXUPIMMSDZrrikz:
7933 case X86::VFPCLASSSDZrr:
7934 case X86::VFPCLASSSDZrrk:
7935 case X86::VGETEXPSDZr:
7936 case X86::VGETEXPSDZrk:
7937 case X86::VGETEXPSDZrkz:
7938 case X86::VGETMANTSDZrri:
7939 case X86::VGETMANTSDZrrik:
7940 case X86::VGETMANTSDZrrikz:
7941 case X86::VRANGESDZrri:
7942 case X86::VRANGESDZrrik:
7943 case X86::VRANGESDZrrikz:
7944 case X86::VRCP14SDZrr:
7945 case X86::VRCP14SDZrrk:
7946 case X86::VRCP14SDZrrkz:
7947 case X86::VRCP28SDZr:
7948 case X86::VRCP28SDZrk:
7949 case X86::VRCP28SDZrkz:
7950 case X86::VREDUCESDZrri:
7951 case X86::VREDUCESDZrrik:
7952 case X86::VREDUCESDZrrikz:
7953 case X86::VRNDSCALESDZr_Int:
7954 case X86::VRNDSCALESDZr_Intk:
7955 case X86::VRNDSCALESDZr_Intkz:
7956 case X86::VRSQRT14SDZrr:
7957 case X86::VRSQRT14SDZrrk:
7958 case X86::VRSQRT14SDZrrkz:
7959 case X86::VRSQRT28SDZr:
7960 case X86::VRSQRT28SDZrk:
7961 case X86::VRSQRT28SDZrkz:
7962 case X86::VSCALEFSDZrr:
7963 case X86::VSCALEFSDZrrk:
7964 case X86::VSCALEFSDZrrkz:
7971 if ((Opc == X86::VMOVSHZrm || Opc == X86::VMOVSHZrm_alt) &&
RegSize > 16) {
7976 case X86::VADDSHZrr_Int:
7977 case X86::VCMPSHZrri_Int:
7978 case X86::VDIVSHZrr_Int:
7979 case X86::VMAXSHZrr_Int:
7980 case X86::VMINSHZrr_Int:
7981 case X86::VMULSHZrr_Int:
7982 case X86::VSUBSHZrr_Int:
7983 case X86::VADDSHZrr_Intk:
7984 case X86::VADDSHZrr_Intkz:
7985 case X86::VCMPSHZrri_Intk:
7986 case X86::VDIVSHZrr_Intk:
7987 case X86::VDIVSHZrr_Intkz:
7988 case X86::VMAXSHZrr_Intk:
7989 case X86::VMAXSHZrr_Intkz:
7990 case X86::VMINSHZrr_Intk:
7991 case X86::VMINSHZrr_Intkz:
7992 case X86::VMULSHZrr_Intk:
7993 case X86::VMULSHZrr_Intkz:
7994 case X86::VSUBSHZrr_Intk:
7995 case X86::VSUBSHZrr_Intkz:
7996 case X86::VFMADD132SHZr_Int:
7997 case X86::VFNMADD132SHZr_Int:
7998 case X86::VFMADD213SHZr_Int:
7999 case X86::VFNMADD213SHZr_Int:
8000 case X86::VFMADD231SHZr_Int:
8001 case X86::VFNMADD231SHZr_Int:
8002 case X86::VFMSUB132SHZr_Int:
8003 case X86::VFNMSUB132SHZr_Int:
8004 case X86::VFMSUB213SHZr_Int:
8005 case X86::VFNMSUB213SHZr_Int:
8006 case X86::VFMSUB231SHZr_Int:
8007 case X86::VFNMSUB231SHZr_Int:
8008 case X86::VFMADD132SHZr_Intk:
8009 case X86::VFNMADD132SHZr_Intk:
8010 case X86::VFMADD213SHZr_Intk:
8011 case X86::VFNMADD213SHZr_Intk:
8012 case X86::VFMADD231SHZr_Intk:
8013 case X86::VFNMADD231SHZr_Intk:
8014 case X86::VFMSUB132SHZr_Intk:
8015 case X86::VFNMSUB132SHZr_Intk:
8016 case X86::VFMSUB213SHZr_Intk:
8017 case X86::VFNMSUB213SHZr_Intk:
8018 case X86::VFMSUB231SHZr_Intk:
8019 case X86::VFNMSUB231SHZr_Intk:
8020 case X86::VFMADD132SHZr_Intkz:
8021 case X86::VFNMADD132SHZr_Intkz:
8022 case X86::VFMADD213SHZr_Intkz:
8023 case X86::VFNMADD213SHZr_Intkz:
8024 case X86::VFMADD231SHZr_Intkz:
8025 case X86::VFNMADD231SHZr_Intkz:
8026 case X86::VFMSUB132SHZr_Intkz:
8027 case X86::VFNMSUB132SHZr_Intkz:
8028 case X86::VFMSUB213SHZr_Intkz:
8029 case X86::VFNMSUB213SHZr_Intkz:
8030 case X86::VFMSUB231SHZr_Intkz:
8031 case X86::VFNMSUB231SHZr_Intkz:
8048 for (
auto Op : Ops) {
8049 if (
MI.getOperand(
Op).getSubReg())
8079 case X86::AVX512_512_SET0:
8080 case X86::AVX512_512_SETALLONES:
8081 Alignment =
Align(64);
8083 case X86::AVX2_SETALLONES:
8084 case X86::AVX1_SETALLONES:
8086 case X86::AVX512_256_SET0:
8087 Alignment =
Align(32);
8090 case X86::V_SETALLONES:
8091 case X86::AVX512_128_SET0:
8092 case X86::FsFLD0F128:
8093 case X86::AVX512_FsFLD0F128:
8094 Alignment =
Align(16);
8098 case X86::AVX512_FsFLD0SD:
8099 Alignment =
Align(8);
8102 case X86::AVX512_FsFLD0SS:
8103 Alignment =
Align(4);
8106 case X86::AVX512_FsFLD0SH:
8107 Alignment =
Align(2);
8112 if (Ops.
size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
8113 unsigned NewOpc = 0;
8114 switch (
MI.getOpcode()) {
8118 NewOpc = X86::CMP8ri;
8121 NewOpc = X86::CMP16ri;
8124 NewOpc = X86::CMP32ri;
8127 NewOpc = X86::CMP64ri32;
8131 MI.setDesc(
get(NewOpc));
8132 MI.getOperand(1).ChangeToImmediate(0);
8133 }
else if (Ops.
size() != 1)
8145 case X86::V_SETALLONES:
8146 case X86::AVX2_SETALLONES:
8147 case X86::AVX1_SETALLONES:
8149 case X86::AVX512_128_SET0:
8150 case X86::AVX512_256_SET0:
8151 case X86::AVX512_512_SET0:
8152 case X86::AVX512_512_SETALLONES:
8154 case X86::AVX512_FsFLD0SH:
8156 case X86::AVX512_FsFLD0SD:
8158 case X86::AVX512_FsFLD0SS:
8159 case X86::FsFLD0F128:
8160 case X86::AVX512_FsFLD0F128: {
8169 unsigned PICBase = 0;
8172 if (Subtarget.is64Bit()) {
8185 bool IsAllOnes =
false;
8188 case X86::AVX512_FsFLD0SS:
8192 case X86::AVX512_FsFLD0SD:
8195 case X86::FsFLD0F128:
8196 case X86::AVX512_FsFLD0F128:
8200 case X86::AVX512_FsFLD0SH:
8203 case X86::AVX512_512_SETALLONES:
8206 case X86::AVX512_512_SET0:
8210 case X86::AVX1_SETALLONES:
8211 case X86::AVX2_SETALLONES:
8214 case X86::AVX512_256_SET0:
8224 case X86::V_SETALLONES:
8228 case X86::AVX512_128_SET0:
8246 case X86::VPBROADCASTBZ128rm:
8247 case X86::VPBROADCASTBZ256rm:
8248 case X86::VPBROADCASTBZrm:
8249 case X86::VBROADCASTF32X2Z256rm:
8250 case X86::VBROADCASTF32X2Zrm:
8251 case X86::VBROADCASTI32X2Z128rm:
8252 case X86::VBROADCASTI32X2Z256rm:
8253 case X86::VBROADCASTI32X2Zrm:
8257#define FOLD_BROADCAST(SIZE) \
8258 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands, \
8259 LoadMI.operands_begin() + NumOps); \
8260 return foldMemoryBroadcast(MF, MI, Ops[0], MOs, InsertPt, SIZE, \
8262 case X86::VPBROADCASTWZ128rm:
8263 case X86::VPBROADCASTWZ256rm:
8264 case X86::VPBROADCASTWZrm:
8266 case X86::VPBROADCASTDZ128rm:
8267 case X86::VPBROADCASTDZ256rm:
8268 case X86::VPBROADCASTDZrm:
8269 case X86::VBROADCASTSSZ128rm:
8270 case X86::VBROADCASTSSZ256rm:
8271 case X86::VBROADCASTSSZrm:
8273 case X86::VPBROADCASTQZ128rm:
8274 case X86::VPBROADCASTQZ256rm:
8275 case X86::VPBROADCASTQZrm:
8276 case X86::VBROADCASTSDZ256rm:
8277 case X86::VBROADCASTSDZrm:
8290 0, Alignment,
true);
8297 unsigned BitsSize,
bool AllowCommute)
const {
8301 ?
fuseInst(MF,
I->DstOp, OpNum, MOs, InsertPt,
MI, *
this)
8307 unsigned CommuteOpIdx2 = commuteOperandsForFold(
MI, OpNum);
8308 if (CommuteOpIdx2 == OpNum) {
8313 foldMemoryBroadcast(MF,
MI, CommuteOpIdx2, MOs, InsertPt, BitsSize,
8318 commuteInstruction(
MI,
false, OpNum, CommuteOpIdx2);
8333 if (!MMO->isStore()) {
8351 if (!MMO->isStore())
8354 if (!MMO->isLoad()) {
8372 assert((SpillSize == 64 || STI.hasVLX()) &&
8373 "Can't broadcast less than 64 bytes without AVX512VL!");
8375#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64) \
8377 switch (SpillSize) { \
8379 llvm_unreachable("Unknown spill size"); \
8413 unsigned Opc =
I->DstOp;
8417 if (UnfoldLoad && !FoldedLoad)
8419 UnfoldLoad &= FoldedLoad;
8420 if (UnfoldStore && !FoldedStore)
8422 UnfoldStore &= FoldedStore;
8429 if (!
MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
8430 Subtarget.isUnalignedMem16Slow())
8439 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
8443 else if (
Op.isReg() &&
Op.isImplicit())
8459 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*RC), 16);
8460 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8504 case X86::CMP64ri32:
8515 case X86::CMP64ri32:
8516 NewOpc = X86::TEST64rr;
8519 NewOpc = X86::TEST32rr;
8522 NewOpc = X86::TEST16rr;
8525 NewOpc = X86::TEST8rr;
8539 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*DstRC), 16);
8540 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8556 if (!
N->isMachineOpcode())
8562 unsigned Opc =
I->DstOp;
8570 unsigned NumDefs = MCID.
NumDefs;
8571 std::vector<SDValue> AddrOps;
8572 std::vector<SDValue> BeforeOps;
8573 std::vector<SDValue> AfterOps;
8575 unsigned NumOps =
N->getNumOperands();
8576 for (
unsigned i = 0; i != NumOps - 1; ++i) {
8579 AddrOps.push_back(
Op);
8580 else if (i <
Index - NumDefs)
8581 BeforeOps.push_back(
Op);
8582 else if (i >
Index - NumDefs)
8583 AfterOps.push_back(
Op);
8585 SDValue Chain =
N->getOperand(NumOps - 1);
8586 AddrOps.push_back(Chain);
8591 EVT VT = *
TRI.legalclasstypes_begin(*RC);
8593 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8594 Subtarget.isUnalignedMem16Slow())
8604 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*RC), 16);
8605 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8617 std::vector<EVT> VTs;
8621 VTs.push_back(*
TRI.legalclasstypes_begin(*DstRC));
8623 for (
unsigned i = 0, e =
N->getNumValues(); i != e; ++i) {
8624 EVT VT =
N->getValueType(i);
8625 if (VT != MVT::Other && i >= (
unsigned)MCID.
getNumDefs())
8629 BeforeOps.push_back(
SDValue(Load, 0));
8635 case X86::CMP64ri32:
8643 case X86::CMP64ri32:
8644 Opc = X86::TEST64rr;
8647 Opc = X86::TEST32rr;
8650 Opc = X86::TEST16rr;
8656 BeforeOps[1] = BeforeOps[0];
8665 AddrOps.push_back(
SDValue(NewNode, 0));
8666 AddrOps.push_back(Chain);
8668 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8669 Subtarget.isUnalignedMem16Slow())
8674 unsigned Alignment = std::max<uint32_t>(
TRI.getSpillSize(*RC), 16);
8675 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8678 dl, MVT::Other, AddrOps);
8691 unsigned *LoadRegIndex)
const {
8697 if (UnfoldLoad && !FoldedLoad)
8699 if (UnfoldStore && !FoldedStore)
8708 int64_t &Offset2)
const {
8712 auto IsLoadOpcode = [&](
unsigned Opcode) {
8724 case X86::MOVSSrm_alt:
8726 case X86::MOVSDrm_alt:
8727 case X86::MMX_MOVD64rm:
8728 case X86::MMX_MOVQ64rm:
8737 case X86::VMOVSSrm_alt:
8739 case X86::VMOVSDrm_alt:
8740 case X86::VMOVAPSrm:
8741 case X86::VMOVUPSrm:
8742 case X86::VMOVAPDrm:
8743 case X86::VMOVUPDrm:
8744 case X86::VMOVDQArm:
8745 case X86::VMOVDQUrm:
8746 case X86::VMOVAPSYrm:
8747 case X86::VMOVUPSYrm:
8748 case X86::VMOVAPDYrm:
8749 case X86::VMOVUPDYrm:
8750 case X86::VMOVDQAYrm:
8751 case X86::VMOVDQUYrm:
8753 case X86::VMOVSSZrm:
8754 case X86::VMOVSSZrm_alt:
8755 case X86::VMOVSDZrm:
8756 case X86::VMOVSDZrm_alt:
8757 case X86::VMOVAPSZ128rm:
8758 case X86::VMOVUPSZ128rm:
8759 case X86::VMOVAPSZ128rm_NOVLX:
8760 case X86::VMOVUPSZ128rm_NOVLX:
8761 case X86::VMOVAPDZ128rm:
8762 case X86::VMOVUPDZ128rm:
8763 case X86::VMOVDQU8Z128rm:
8764 case X86::VMOVDQU16Z128rm:
8765 case X86::VMOVDQA32Z128rm:
8766 case X86::VMOVDQU32Z128rm:
8767 case X86::VMOVDQA64Z128rm:
8768 case X86::VMOVDQU64Z128rm:
8769 case X86::VMOVAPSZ256rm:
8770 case X86::VMOVUPSZ256rm:
8771 case X86::VMOVAPSZ256rm_NOVLX:
8772 case X86::VMOVUPSZ256rm_NOVLX:
8773 case X86::VMOVAPDZ256rm:
8774 case X86::VMOVUPDZ256rm:
8775 case X86::VMOVDQU8Z256rm:
8776 case X86::VMOVDQU16Z256rm:
8777 case X86::VMOVDQA32Z256rm:
8778 case X86::VMOVDQU32Z256rm:
8779 case X86::VMOVDQA64Z256rm:
8780 case X86::VMOVDQU64Z256rm:
8781 case X86::VMOVAPSZrm:
8782 case X86::VMOVUPSZrm:
8783 case X86::VMOVAPDZrm:
8784 case X86::VMOVUPDZrm:
8785 case X86::VMOVDQU8Zrm:
8786 case X86::VMOVDQU16Zrm:
8787 case X86::VMOVDQA32Zrm:
8788 case X86::VMOVDQU32Zrm:
8789 case X86::VMOVDQA64Zrm:
8790 case X86::VMOVDQU64Zrm:
8792 case X86::KMOVBkm_EVEX:
8794 case X86::KMOVWkm_EVEX:
8796 case X86::KMOVDkm_EVEX:
8798 case X86::KMOVQkm_EVEX:
8808 auto HasSameOp = [&](
int I) {
8824 if (!Disp1 || !Disp2)
8827 Offset1 = Disp1->getSExtValue();
8828 Offset2 = Disp2->getSExtValue();
8833 int64_t Offset1, int64_t Offset2,
8834 unsigned NumLoads)
const {
8835 assert(Offset2 > Offset1);
8836 if ((Offset2 - Offset1) / 8 > 64)
8850 case X86::MMX_MOVD64rm:
8851 case X86::MMX_MOVQ64rm:
8860 if (Subtarget.is64Bit()) {
8863 }
else if (NumLoads) {
8886 unsigned Opcode =
MI.getOpcode();
8887 if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
8888 Opcode == X86::PLDTILECFGV)
8901 assert(
Cond.size() == 1 &&
"Invalid X86 branch condition!");
8911 return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
8912 RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
8913 RC == &X86::RFP80RegClass);
8925 if (GlobalBaseReg != 0)
8926 return GlobalBaseReg;
8932 Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass);
8934 return GlobalBaseReg;
8942 for (
const uint16_t(&Row)[3] : Table)
8943 if (Row[domain - 1] == opcode)
8951 for (
const uint16_t(&Row)[4] : Table)
8952 if (Row[domain - 1] == opcode || (domain == 3 && Row[3] == opcode))
8959 unsigned NewWidth,
unsigned *pNewMask =
nullptr) {
8960 assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&
8961 "Illegal blend mask scale");
8962 unsigned NewMask = 0;
8964 if ((OldWidth % NewWidth) == 0) {
8965 unsigned Scale = OldWidth / NewWidth;
8966 unsigned SubMask = (1u << Scale) - 1;
8967 for (
unsigned i = 0; i != NewWidth; ++i) {
8968 unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
8970 NewMask |= (1u << i);
8971 else if (Sub != 0x0)
8975 unsigned Scale = NewWidth / OldWidth;
8976 unsigned SubMask = (1u << Scale) - 1;
8977 for (
unsigned i = 0; i != OldWidth; ++i) {
8978 if (OldMask & (1 << i)) {
8979 NewMask |= (SubMask << (i * Scale));
8985 *pNewMask = NewMask;
8990 unsigned Opcode =
MI.getOpcode();
8991 unsigned NumOperands =
MI.getDesc().getNumOperands();
8993 auto GetBlendDomains = [&](
unsigned ImmWidth,
bool Is256) {
8995 if (
MI.getOperand(NumOperands - 1).isImm()) {
8996 unsigned Imm =
MI.getOperand(NumOperands - 1).getImm();
8998 validDomains |= 0x2;
9000 validDomains |= 0x4;
9001 if (!Is256 || Subtarget.
hasAVX2())
9002 validDomains |= 0x8;
9004 return validDomains;
9008 case X86::BLENDPDrmi:
9009 case X86::BLENDPDrri:
9010 case X86::VBLENDPDrmi:
9011 case X86::VBLENDPDrri:
9012 return GetBlendDomains(2,
false);
9013 case X86::VBLENDPDYrmi:
9014 case X86::VBLENDPDYrri:
9015 return GetBlendDomains(4,
true);
9016 case X86::BLENDPSrmi:
9017 case X86::BLENDPSrri:
9018 case X86::VBLENDPSrmi:
9019 case X86::VBLENDPSrri:
9020 case X86::VPBLENDDrmi:
9021 case X86::VPBLENDDrri:
9022 return GetBlendDomains(4,
false);
9023 case X86::VBLENDPSYrmi:
9024 case X86::VBLENDPSYrri:
9025 case X86::VPBLENDDYrmi:
9026 case X86::VPBLENDDYrri:
9027 return GetBlendDomains(8,
true);
9028 case X86::PBLENDWrmi:
9029 case X86::PBLENDWrri:
9030 case X86::VPBLENDWrmi:
9031 case X86::VPBLENDWrri:
9033 case X86::VPBLENDWYrmi:
9034 case X86::VPBLENDWYrri:
9035 return GetBlendDomains(8,
false);
9036 case X86::VPANDDZ128rr:
9037 case X86::VPANDDZ128rm:
9038 case X86::VPANDDZ256rr:
9039 case X86::VPANDDZ256rm:
9040 case X86::VPANDQZ128rr:
9041 case X86::VPANDQZ128rm:
9042 case X86::VPANDQZ256rr:
9043 case X86::VPANDQZ256rm:
9044 case X86::VPANDNDZ128rr:
9045 case X86::VPANDNDZ128rm:
9046 case X86::VPANDNDZ256rr:
9047 case X86::VPANDNDZ256rm:
9048 case X86::VPANDNQZ128rr:
9049 case X86::VPANDNQZ128rm:
9050 case X86::VPANDNQZ256rr:
9051 case X86::VPANDNQZ256rm:
9052 case X86::VPORDZ128rr:
9053 case X86::VPORDZ128rm:
9054 case X86::VPORDZ256rr:
9055 case X86::VPORDZ256rm:
9056 case X86::VPORQZ128rr:
9057 case X86::VPORQZ128rm:
9058 case X86::VPORQZ256rr:
9059 case X86::VPORQZ256rm:
9060 case X86::VPXORDZ128rr:
9061 case X86::VPXORDZ128rm:
9062 case X86::VPXORDZ256rr:
9063 case X86::VPXORDZ256rm:
9064 case X86::VPXORQZ128rr:
9065 case X86::VPXORQZ128rm:
9066 case X86::VPXORQZ256rr:
9067 case X86::VPXORQZ256rm:
9070 if (Subtarget.hasDQI())
9073 if (RI.getEncodingValue(
MI.getOperand(0).getReg()) >= 16)
9075 if (RI.getEncodingValue(
MI.getOperand(1).getReg()) >= 16)
9078 if (NumOperands == 3 &&
9079 RI.getEncodingValue(
MI.getOperand(2).getReg()) >= 16)
9084 case X86::MOVHLPSrr:
9091 if (
MI.getOperand(1).getReg() ==
MI.getOperand(2).getReg() &&
9092 MI.getOperand(0).getSubReg() == 0 &&
9093 MI.getOperand(1).getSubReg() == 0 &&
MI.getOperand(2).getSubReg() == 0)
9096 case X86::SHUFPDrri:
9102#include "X86ReplaceableInstrs.def"
9108 assert(dom &&
"Not an SSE instruction");
9110 unsigned Opcode =
MI.getOpcode();
9111 unsigned NumOperands =
MI.getDesc().getNumOperands();
9113 auto SetBlendDomain = [&](
unsigned ImmWidth,
bool Is256) {
9114 if (
MI.getOperand(NumOperands - 1).isImm()) {
9115 unsigned Imm =
MI.getOperand(NumOperands - 1).getImm() & 255;
9116 Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
9117 unsigned NewImm = Imm;
9119 const uint16_t *table =
lookup(Opcode, dom, ReplaceableBlendInstrs);
9121 table =
lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
9125 }
else if (
Domain == 2) {
9127 }
else if (
Domain == 3) {
9130 if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
9131 table =
lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
9135 assert(!Is256 &&
"128-bit vector expected");
9140 assert(table && table[
Domain - 1] &&
"Unknown domain op");
9142 MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
9148 case X86::BLENDPDrmi:
9149 case X86::BLENDPDrri:
9150 case X86::VBLENDPDrmi:
9151 case X86::VBLENDPDrri:
9152 return SetBlendDomain(2,
false);
9153 case X86::VBLENDPDYrmi:
9154 case X86::VBLENDPDYrri:
9155 return SetBlendDomain(4,
true);
9156 case X86::BLENDPSrmi:
9157 case X86::BLENDPSrri:
9158 case X86::VBLENDPSrmi:
9159 case X86::VBLENDPSrri:
9160 case X86::VPBLENDDrmi:
9161 case X86::VPBLENDDrri:
9162 return SetBlendDomain(4,
false);
9163 case X86::VBLENDPSYrmi:
9164 case X86::VBLENDPSYrri:
9165 case X86::VPBLENDDYrmi:
9166 case X86::VPBLENDDYrri:
9167 return SetBlendDomain(8,
true);
9168 case X86::PBLENDWrmi:
9169 case X86::PBLENDWrri:
9170 case X86::VPBLENDWrmi:
9171 case X86::VPBLENDWrri:
9172 return SetBlendDomain(8,
false);
9173 case X86::VPBLENDWYrmi:
9174 case X86::VPBLENDWYrri:
9175 return SetBlendDomain(16,
true);
9176 case X86::VPANDDZ128rr:
9177 case X86::VPANDDZ128rm:
9178 case X86::VPANDDZ256rr:
9179 case X86::VPANDDZ256rm:
9180 case X86::VPANDQZ128rr:
9181 case X86::VPANDQZ128rm:
9182 case X86::VPANDQZ256rr:
9183 case X86::VPANDQZ256rm:
9184 case X86::VPANDNDZ128rr:
9185 case X86::VPANDNDZ128rm:
9186 case X86::VPANDNDZ256rr:
9187 case X86::VPANDNDZ256rm:
9188 case X86::VPANDNQZ128rr:
9189 case X86::VPANDNQZ128rm:
9190 case X86::VPANDNQZ256rr:
9191 case X86::VPANDNQZ256rm:
9192 case X86::VPORDZ128rr:
9193 case X86::VPORDZ128rm:
9194 case X86::VPORDZ256rr:
9195 case X86::VPORDZ256rm:
9196 case X86::VPORQZ128rr:
9197 case X86::VPORQZ128rm:
9198 case X86::VPORQZ256rr:
9199 case X86::VPORQZ256rm:
9200 case X86::VPXORDZ128rr:
9201 case X86::VPXORDZ128rm:
9202 case X86::VPXORDZ256rr:
9203 case X86::VPXORDZ256rm:
9204 case X86::VPXORQZ128rr:
9205 case X86::VPXORQZ128rm:
9206 case X86::VPXORQZ256rr:
9207 case X86::VPXORQZ256rm: {
9209 if (Subtarget.hasDQI())
9213 lookupAVX512(
MI.getOpcode(), dom, ReplaceableCustomAVX512LogicInstrs);
9214 assert(table &&
"Instruction not found in table?");
9217 if (
Domain == 3 && (dom == 1 || table[3] ==
MI.getOpcode()))
9222 case X86::UNPCKHPDrr:
9223 case X86::MOVHLPSrr:
9226 MI.getOperand(1).getReg() ==
MI.getOperand(2).getReg() &&
9227 MI.getOperand(0).getSubReg() == 0 &&
9228 MI.getOperand(1).getSubReg() == 0 &&
9229 MI.getOperand(2).getSubReg() == 0) {
9230 commuteInstruction(
MI,
false);
9234 if (Opcode == X86::MOVHLPSrr)
9237 case X86::SHUFPDrri: {
9239 unsigned Imm =
MI.getOperand(3).getImm();
9240 unsigned NewImm = 0x44;
9245 MI.getOperand(3).setImm(NewImm);
9246 MI.setDesc(
get(X86::SHUFPSrri));
9254std::pair<uint16_t, uint16_t>
9257 unsigned opcode =
MI.getOpcode();
9263 return std::make_pair(domain, validDomains);
9265 if (
lookup(opcode, domain, ReplaceableInstrs)) {
9267 }
else if (
lookup(opcode, domain, ReplaceableInstrsAVX2)) {
9268 validDomains = Subtarget.
hasAVX2() ? 0xe : 0x6;
9269 }
else if (
lookup(opcode, domain, ReplaceableInstrsFP)) {
9271 }
else if (
lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
9275 return std::make_pair(0, 0);
9277 }
else if (
lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
9279 }
else if (Subtarget.hasDQI() &&
9280 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
9282 }
else if (Subtarget.hasDQI()) {
9284 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQMasked)) {
9285 if (domain == 1 || (domain == 3 && table[3] == opcode))
9292 return std::make_pair(domain, validDomains);
9298 assert(dom &&
"Not an SSE instruction");
9307 "256-bit vector operations only available in AVX2");
9308 table =
lookup(
MI.getOpcode(), dom, ReplaceableInstrsAVX2);
9311 table =
lookup(
MI.getOpcode(), dom, ReplaceableInstrsFP);
9313 "Can only select PackedSingle or PackedDouble");
9317 "256-bit insert/extract only available in AVX2");
9318 table =
lookup(
MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
9322 table =
lookupAVX512(
MI.getOpcode(), dom, ReplaceableInstrsAVX512);
9324 if (table &&
Domain == 3 && table[3] ==
MI.getOpcode())
9328 assert((Subtarget.hasDQI() ||
Domain >= 3) &&
"Requires AVX-512DQ");
9329 table =
lookupAVX512(
MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
9332 if (table &&
Domain == 3 && (dom == 1 || table[3] ==
MI.getOpcode()))
9336 assert((Subtarget.hasDQI() ||
Domain >= 3) &&
"Requires AVX-512DQ");
9337 table =
lookupAVX512(
MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked);
9338 if (table &&
Domain == 3 && (dom == 1 || table[3] ==
MI.getOpcode()))
9341 assert(table &&
"Cannot change domain");
9367 case X86::DIVSDrm_Int:
9369 case X86::DIVSDrr_Int:
9371 case X86::DIVSSrm_Int:
9373 case X86::DIVSSrr_Int:
9379 case X86::SQRTSDm_Int:
9381 case X86::SQRTSDr_Int:
9383 case X86::SQRTSSm_Int:
9385 case X86::SQRTSSr_Int:
9389 case X86::VDIVPDYrm:
9390 case X86::VDIVPDYrr:
9393 case X86::VDIVPSYrm:
9394 case X86::VDIVPSYrr:
9396 case X86::VDIVSDrm_Int:
9398 case X86::VDIVSDrr_Int:
9400 case X86::VDIVSSrm_Int:
9402 case X86::VDIVSSrr_Int:
9405 case X86::VSQRTPDYm:
9406 case X86::VSQRTPDYr:
9409 case X86::VSQRTPSYm:
9410 case X86::VSQRTPSYr:
9412 case X86::VSQRTSDm_Int:
9414 case X86::VSQRTSDr_Int:
9416 case X86::VSQRTSSm_Int:
9418 case X86::VSQRTSSr_Int:
9420 case X86::VDIVPDZ128rm:
9421 case X86::VDIVPDZ128rmb:
9422 case X86::VDIVPDZ128rmbk:
9423 case X86::VDIVPDZ128rmbkz:
9424 case X86::VDIVPDZ128rmk:
9425 case X86::VDIVPDZ128rmkz:
9426 case X86::VDIVPDZ128rr:
9427 case X86::VDIVPDZ128rrk:
9428 case X86::VDIVPDZ128rrkz:
9429 case X86::VDIVPDZ256rm:
9430 case X86::VDIVPDZ256rmb:
9431 case X86::VDIVPDZ256rmbk:
9432 case X86::VDIVPDZ256rmbkz:
9433 case X86::VDIVPDZ256rmk:
9434 case X86::VDIVPDZ256rmkz:
9435 case X86::VDIVPDZ256rr:
9436 case X86::VDIVPDZ256rrk:
9437 case X86::VDIVPDZ256rrkz:
9438 case X86::VDIVPDZrrb:
9439 case X86::VDIVPDZrrbk:
9440 case X86::VDIVPDZrrbkz:
9441 case X86::VDIVPDZrm:
9442 case X86::VDIVPDZrmb:
9443 case X86::VDIVPDZrmbk:
9444 case X86::VDIVPDZrmbkz:
9445 case X86::VDIVPDZrmk:
9446 case X86::VDIVPDZrmkz:
9447 case X86::VDIVPDZrr:
9448 case X86::VDIVPDZrrk:
9449 case X86::VDIVPDZrrkz:
9450 case X86::VDIVPSZ128rm:
9451 case X86::VDIVPSZ128rmb:
9452 case X86::VDIVPSZ128rmbk:
9453 case X86::VDIVPSZ128rmbkz:
9454 case X86::VDIVPSZ128rmk:
9455 case X86::VDIVPSZ128rmkz:
9456 case X86::VDIVPSZ128rr:
9457 case X86::VDIVPSZ128rrk:
9458 case X86::VDIVPSZ128rrkz:
9459 case X86::VDIVPSZ256rm:
9460 case X86::VDIVPSZ256rmb:
9461 case X86::VDIVPSZ256rmbk:
9462 case X86::VDIVPSZ256rmbkz:
9463 case X86::VDIVPSZ256rmk:
9464 case X86::VDIVPSZ256rmkz:
9465 case X86::VDIVPSZ256rr:
9466 case X86::VDIVPSZ256rrk:
9467 case X86::VDIVPSZ256rrkz:
9468 case X86::VDIVPSZrrb:
9469 case X86::VDIVPSZrrbk:
9470 case X86::VDIVPSZrrbkz:
9471 case X86::VDIVPSZrm:
9472 case X86::VDIVPSZrmb:
9473 case X86::VDIVPSZrmbk:
9474 case X86::VDIVPSZrmbkz:
9475 case X86::VDIVPSZrmk:
9476 case X86::VDIVPSZrmkz:
9477 case X86::VDIVPSZrr:
9478 case X86::VDIVPSZrrk:
9479 case X86::VDIVPSZrrkz:
9480 case X86::VDIVSDZrm:
9481 case X86::VDIVSDZrr:
9482 case X86::VDIVSDZrm_Int:
9483 case X86::VDIVSDZrm_Intk:
9484 case X86::VDIVSDZrm_Intkz:
9485 case X86::VDIVSDZrr_Int:
9486 case X86::VDIVSDZrr_Intk:
9487 case X86::VDIVSDZrr_Intkz:
9488 case X86::VDIVSDZrrb_Int:
9489 case X86::VDIVSDZrrb_Intk:
9490 case X86::VDIVSDZrrb_Intkz:
9491 case X86::VDIVSSZrm:
9492 case X86::VDIVSSZrr:
9493 case X86::VDIVSSZrm_Int:
9494 case X86::VDIVSSZrm_Intk:
9495 case X86::VDIVSSZrm_Intkz:
9496 case X86::VDIVSSZrr_Int:
9497 case X86::VDIVSSZrr_Intk:
9498 case X86::VDIVSSZrr_Intkz:
9499 case X86::VDIVSSZrrb_Int:
9500 case X86::VDIVSSZrrb_Intk:
9501 case X86::VDIVSSZrrb_Intkz:
9502 case X86::VSQRTPDZ128m:
9503 case X86::VSQRTPDZ128mb:
9504 case X86::VSQRTPDZ128mbk:
9505 case X86::VSQRTPDZ128mbkz:
9506 case X86::VSQRTPDZ128mk:
9507 case X86::VSQRTPDZ128mkz:
9508 case X86::VSQRTPDZ128r:
9509 case X86::VSQRTPDZ128rk:
9510 case X86::VSQRTPDZ128rkz:
9511 case X86::VSQRTPDZ256m:
9512 case X86::VSQRTPDZ256mb:
9513 case X86::VSQRTPDZ256mbk:
9514 case X86::VSQRTPDZ256mbkz:
9515 case X86::VSQRTPDZ256mk:
9516 case X86::VSQRTPDZ256mkz:
9517 case X86::VSQRTPDZ256r:
9518 case X86::VSQRTPDZ256rk:
9519 case X86::VSQRTPDZ256rkz:
9520 case X86::VSQRTPDZm:
9521 case X86::VSQRTPDZmb:
9522 case X86::VSQRTPDZmbk:
9523 case X86::VSQRTPDZmbkz:
9524 case X86::VSQRTPDZmk:
9525 case X86::VSQRTPDZmkz:
9526 case X86::VSQRTPDZr:
9527 case X86::VSQRTPDZrb:
9528 case X86::VSQRTPDZrbk:
9529 case X86::VSQRTPDZrbkz:
9530 case X86::VSQRTPDZrk:
9531 case X86::VSQRTPDZrkz:
9532 case X86::VSQRTPSZ128m:
9533 case X86::VSQRTPSZ128mb:
9534 case X86::VSQRTPSZ128mbk:
9535 case X86::VSQRTPSZ128mbkz:
9536 case X86::VSQRTPSZ128mk:
9537 case X86::VSQRTPSZ128mkz:
9538 case X86::VSQRTPSZ128r:
9539 case X86::VSQRTPSZ128rk:
9540 case X86::VSQRTPSZ128rkz:
9541 case X86::VSQRTPSZ256m:
9542 case X86::VSQRTPSZ256mb:
9543 case X86::VSQRTPSZ256mbk:
9544 case X86::VSQRTPSZ256mbkz:
9545 case X86::VSQRTPSZ256mk:
9546 case X86::VSQRTPSZ256mkz:
9547 case X86::VSQRTPSZ256r:
9548 case X86::VSQRTPSZ256rk:
9549 case X86::VSQRTPSZ256rkz:
9550 case X86::VSQRTPSZm:
9551 case X86::VSQRTPSZmb:
9552 case X86::VSQRTPSZmbk:
9553 case X86::VSQRTPSZmbkz:
9554 case X86::VSQRTPSZmk:
9555 case X86::VSQRTPSZmkz:
9556 case X86::VSQRTPSZr:
9557 case X86::VSQRTPSZrb:
9558 case X86::VSQRTPSZrbk:
9559 case X86::VSQRTPSZrbkz:
9560 case X86::VSQRTPSZrk:
9561 case X86::VSQRTPSZrkz:
9562 case X86::VSQRTSDZm:
9563 case X86::VSQRTSDZm_Int:
9564 case X86::VSQRTSDZm_Intk:
9565 case X86::VSQRTSDZm_Intkz:
9566 case X86::VSQRTSDZr:
9567 case X86::VSQRTSDZr_Int:
9568 case X86::VSQRTSDZr_Intk:
9569 case X86::VSQRTSDZr_Intkz:
9570 case X86::VSQRTSDZrb_Int:
9571 case X86::VSQRTSDZrb_Intk:
9572 case X86::VSQRTSDZrb_Intkz:
9573 case X86::VSQRTSSZm:
9574 case X86::VSQRTSSZm_Int:
9575 case X86::VSQRTSSZm_Intk:
9576 case X86::VSQRTSSZm_Intkz:
9577 case X86::VSQRTSSZr:
9578 case X86::VSQRTSSZr_Int:
9579 case X86::VSQRTSSZr_Intk:
9580 case X86::VSQRTSSZr_Intkz:
9581 case X86::VSQRTSSZrb_Int:
9582 case X86::VSQRTSSZrb_Intk:
9583 case X86::VSQRTSSZrb_Intkz:
9585 case X86::VGATHERDPDYrm:
9586 case X86::VGATHERDPDZ128rm:
9587 case X86::VGATHERDPDZ256rm:
9588 case X86::VGATHERDPDZrm:
9589 case X86::VGATHERDPDrm:
9590 case X86::VGATHERDPSYrm:
9591 case X86::VGATHERDPSZ128rm:
9592 case X86::VGATHERDPSZ256rm:
9593 case X86::VGATHERDPSZrm:
9594 case X86::VGATHERDPSrm:
9595 case X86::VGATHERPF0DPDm:
9596 case X86::VGATHERPF0DPSm:
9597 case X86::VGATHERPF0QPDm:
9598 case X86::VGATHERPF0QPSm:
9599 case X86::VGATHERPF1DPDm:
9600 case X86::VGATHERPF1DPSm:
9601 case X86::VGATHERPF1QPDm:
9602 case X86::VGATHERPF1QPSm:
9603 case X86::VGATHERQPDYrm:
9604 case X86::VGATHERQPDZ128rm:
9605 case X86::VGATHERQPDZ256rm:
9606 case X86::VGATHERQPDZrm:
9607 case X86::VGATHERQPDrm:
9608 case X86::VGATHERQPSYrm:
9609 case X86::VGATHERQPSZ128rm:
9610 case X86::VGATHERQPSZ256rm:
9611 case X86::VGATHERQPSZrm:
9612 case X86::VGATHERQPSrm:
9613 case X86::VPGATHERDDYrm:
9614 case X86::VPGATHERDDZ128rm:
9615 case X86::VPGATHERDDZ256rm:
9616 case X86::VPGATHERDDZrm:
9617 case X86::VPGATHERDDrm:
9618 case X86::VPGATHERDQYrm:
9619 case X86::VPGATHERDQZ128rm:
9620 case X86::VPGATHERDQZ256rm:
9621 case X86::VPGATHERDQZrm:
9622 case X86::VPGATHERDQrm:
9623 case X86::VPGATHERQDYrm:
9624 case X86::VPGATHERQDZ128rm:
9625 case X86::VPGATHERQDZ256rm:
9626 case X86::VPGATHERQDZrm:
9627 case X86::VPGATHERQDrm:
9628 case X86::VPGATHERQQYrm:
9629 case X86::VPGATHERQQZ128rm:
9630 case X86::VPGATHERQQZ256rm:
9631 case X86::VPGATHERQQZrm:
9632 case X86::VPGATHERQQrm:
9633 case X86::VSCATTERDPDZ128mr:
9634 case X86::VSCATTERDPDZ256mr:
9635 case X86::VSCATTERDPDZmr:
9636 case X86::VSCATTERDPSZ128mr:
9637 case X86::VSCATTERDPSZ256mr:
9638 case X86::VSCATTERDPSZmr:
9639 case X86::VSCATTERPF0DPDm:
9640 case X86::VSCATTERPF0DPSm:
9641 case X86::VSCATTERPF0QPDm:
9642 case X86::VSCATTERPF0QPSm:
9643 case X86::VSCATTERPF1DPDm:
9644 case X86::VSCATTERPF1DPSm:
9645 case X86::VSCATTERPF1QPDm:
9646 case X86::VSCATTERPF1QPSm:
9647 case X86::VSCATTERQPDZ128mr:
9648 case X86::VSCATTERQPDZ256mr:
9649 case X86::VSCATTERQPDZmr:
9650 case X86::VSCATTERQPSZ128mr:
9651 case X86::VSCATTERQPSZ256mr:
9652 case X86::VSCATTERQPSZmr:
9653 case X86::VPSCATTERDDZ128mr:
9654 case X86::VPSCATTERDDZ256mr:
9655 case X86::VPSCATTERDDZmr:
9656 case X86::VPSCATTERDQZ128mr:
9657 case X86::VPSCATTERDQZ256mr:
9658 case X86::VPSCATTERDQZmr:
9659 case X86::VPSCATTERQDZ128mr:
9660 case X86::VPSCATTERQDZ256mr:
9661 case X86::VPSCATTERQDZmr:
9662 case X86::VPSCATTERQQZ128mr:
9663 case X86::VPSCATTERQQZ256mr:
9664 case X86::VPSCATTERQQZmr:
9674 unsigned UseIdx)
const {
9681 Inst.
getNumDefs() <= 2 &&
"Reassociation needs binary operators");
9691 assert((Inst.
getNumDefs() == 1 || FlagDef) &&
"Implicit def isn't flags?");
9692 if (FlagDef && !FlagDef->
isDead())
9703 bool Invert)
const {
9755 case X86::VPANDDZ128rr:
9756 case X86::VPANDDZ256rr:
9757 case X86::VPANDDZrr:
9758 case X86::VPANDQZ128rr:
9759 case X86::VPANDQZ256rr:
9760 case X86::VPANDQZrr:
9763 case X86::VPORDZ128rr:
9764 case X86::VPORDZ256rr:
9766 case X86::VPORQZ128rr:
9767 case X86::VPORQZ256rr:
9771 case X86::VPXORDZ128rr:
9772 case X86::VPXORDZ256rr:
9773 case X86::VPXORDZrr:
9774 case X86::VPXORQZ128rr:
9775 case X86::VPXORQZ256rr:
9776 case X86::VPXORQZrr:
9779 case X86::VANDPDYrr:
9780 case X86::VANDPSYrr:
9781 case X86::VANDPDZ128rr:
9782 case X86::VANDPSZ128rr:
9783 case X86::VANDPDZ256rr:
9784 case X86::VANDPSZ256rr:
9785 case X86::VANDPDZrr:
9786 case X86::VANDPSZrr:
9791 case X86::VORPDZ128rr:
9792 case X86::VORPSZ128rr:
9793 case X86::VORPDZ256rr:
9794 case X86::VORPSZ256rr:
9799 case X86::VXORPDYrr:
9800 case X86::VXORPSYrr:
9801 case X86::VXORPDZ128rr:
9802 case X86::VXORPSZ128rr:
9803 case X86::VXORPDZ256rr:
9804 case X86::VXORPSZ256rr:
9805 case X86::VXORPDZrr:
9806 case X86::VXORPSZrr:
9827 case X86::VPADDBYrr:
9828 case X86::VPADDWYrr:
9829 case X86::VPADDDYrr:
9830 case X86::VPADDQYrr:
9831 case X86::VPADDBZ128rr:
9832 case X86::VPADDWZ128rr:
9833 case X86::VPADDDZ128rr:
9834 case X86::VPADDQZ128rr:
9835 case X86::VPADDBZ256rr:
9836 case X86::VPADDWZ256rr:
9837 case X86::VPADDDZ256rr:
9838 case X86::VPADDQZ256rr:
9839 case X86::VPADDBZrr:
9840 case X86::VPADDWZrr:
9841 case X86::VPADDDZrr:
9842 case X86::VPADDQZrr:
9843 case X86::VPMULLWrr:
9844 case X86::VPMULLWYrr:
9845 case X86::VPMULLWZ128rr:
9846 case X86::VPMULLWZ256rr:
9847 case X86::VPMULLWZrr:
9848 case X86::VPMULLDrr:
9849 case X86::VPMULLDYrr:
9850 case X86::VPMULLDZ128rr:
9851 case X86::VPMULLDZ256rr:
9852 case X86::VPMULLDZrr:
9853 case X86::VPMULLQZ128rr:
9854 case X86::VPMULLQZ256rr:
9855 case X86::VPMULLQZrr:
9856 case X86::VPMAXSBrr:
9857 case X86::VPMAXSBYrr:
9858 case X86::VPMAXSBZ128rr:
9859 case X86::VPMAXSBZ256rr:
9860 case X86::VPMAXSBZrr:
9861 case X86::VPMAXSDrr:
9862 case X86::VPMAXSDYrr:
9863 case X86::VPMAXSDZ128rr:
9864 case X86::VPMAXSDZ256rr:
9865 case X86::VPMAXSDZrr:
9866 case X86::VPMAXSQZ128rr:
9867 case X86::VPMAXSQZ256rr:
9868 case X86::VPMAXSQZrr:
9869 case X86::VPMAXSWrr:
9870 case X86::VPMAXSWYrr:
9871 case X86::VPMAXSWZ128rr:
9872 case X86::VPMAXSWZ256rr:
9873 case X86::VPMAXSWZrr:
9874 case X86::VPMAXUBrr:
9875 case X86::VPMAXUBYrr:
9876 case X86::VPMAXUBZ128rr:
9877 case X86::VPMAXUBZ256rr:
9878 case X86::VPMAXUBZrr:
9879 case X86::VPMAXUDrr:
9880 case X86::VPMAXUDYrr:
9881 case X86::VPMAXUDZ128rr:
9882 case X86::VPMAXUDZ256rr:
9883 case X86::VPMAXUDZrr:
9884 case X86::VPMAXUQZ128rr:
9885 case X86::VPMAXUQZ256rr:
9886 case X86::VPMAXUQZrr:
9887 case X86::VPMAXUWrr:
9888 case X86::VPMAXUWYrr:
9889 case X86::VPMAXUWZ128rr:
9890 case X86::VPMAXUWZ256rr:
9891 case X86::VPMAXUWZrr:
9892 case X86::VPMINSBrr:
9893 case X86::VPMINSBYrr:
9894 case X86::VPMINSBZ128rr:
9895 case X86::VPMINSBZ256rr:
9896 case X86::VPMINSBZrr:
9897 case X86::VPMINSDrr:
9898 case X86::VPMINSDYrr:
9899 case X86::VPMINSDZ128rr:
9900 case X86::VPMINSDZ256rr:
9901 case X86::VPMINSDZrr:
9902 case X86::VPMINSQZ128rr:
9903 case X86::VPMINSQZ256rr:
9904 case X86::VPMINSQZrr:
9905 case X86::VPMINSWrr:
9906 case X86::VPMINSWYrr:
9907 case X86::VPMINSWZ128rr:
9908 case X86::VPMINSWZ256rr:
9909 case X86::VPMINSWZrr:
9910 case X86::VPMINUBrr:
9911 case X86::VPMINUBYrr:
9912 case X86::VPMINUBZ128rr:
9913 case X86::VPMINUBZ256rr:
9914 case X86::VPMINUBZrr:
9915 case X86::VPMINUDrr:
9916 case X86::VPMINUDYrr:
9917 case X86::VPMINUDZ128rr:
9918 case X86::VPMINUDZ256rr:
9919 case X86::VPMINUDZrr:
9920 case X86::VPMINUQZ128rr:
9921 case X86::VPMINUQZ256rr:
9922 case X86::VPMINUQZrr:
9923 case X86::VPMINUWrr:
9924 case X86::VPMINUWYrr:
9925 case X86::VPMINUWZ128rr:
9926 case X86::VPMINUWZ256rr:
9927 case X86::VPMINUWZrr:
9939 case X86::VMAXCPDrr:
9940 case X86::VMAXCPSrr:
9941 case X86::VMAXCPDYrr:
9942 case X86::VMAXCPSYrr:
9943 case X86::VMAXCPDZ128rr:
9944 case X86::VMAXCPSZ128rr:
9945 case X86::VMAXCPDZ256rr:
9946 case X86::VMAXCPSZ256rr:
9947 case X86::VMAXCPDZrr:
9948 case X86::VMAXCPSZrr:
9949 case X86::VMAXCSDrr:
9950 case X86::VMAXCSSrr:
9951 case X86::VMAXCSDZrr:
9952 case X86::VMAXCSSZrr:
9953 case X86::VMINCPDrr:
9954 case X86::VMINCPSrr:
9955 case X86::VMINCPDYrr:
9956 case X86::VMINCPSYrr:
9957 case X86::VMINCPDZ128rr:
9958 case X86::VMINCPSZ128rr:
9959 case X86::VMINCPDZ256rr:
9960 case X86::VMINCPSZ256rr:
9961 case X86::VMINCPDZrr:
9962 case X86::VMINCPSZrr:
9963 case X86::VMINCSDrr:
9964 case X86::VMINCSSrr:
9965 case X86::VMINCSDZrr:
9966 case X86::VMINCSSZrr:
9967 case X86::VMAXCPHZ128rr:
9968 case X86::VMAXCPHZ256rr:
9969 case X86::VMAXCPHZrr:
9970 case X86::VMAXCSHZrr:
9971 case X86::VMINCPHZ128rr:
9972 case X86::VMINCPHZ256rr:
9973 case X86::VMINCPHZrr:
9974 case X86::VMINCSHZrr:
9986 case X86::VADDPDYrr:
9987 case X86::VADDPSYrr:
9988 case X86::VADDPDZ128rr:
9989 case X86::VADDPSZ128rr:
9990 case X86::VADDPDZ256rr:
9991 case X86::VADDPSZ256rr:
9992 case X86::VADDPDZrr:
9993 case X86::VADDPSZrr:
9996 case X86::VADDSDZrr:
9997 case X86::VADDSSZrr:
10000 case X86::VMULPDYrr:
10001 case X86::VMULPSYrr:
10002 case X86::VMULPDZ128rr:
10003 case X86::VMULPSZ128rr:
10004 case X86::VMULPDZ256rr:
10005 case X86::VMULPSZ256rr:
10006 case X86::VMULPDZrr:
10007 case X86::VMULPSZrr:
10008 case X86::VMULSDrr:
10009 case X86::VMULSSrr:
10010 case X86::VMULSDZrr:
10011 case X86::VMULSSZrr:
10012 case X86::VADDPHZ128rr:
10013 case X86::VADDPHZ256rr:
10014 case X86::VADDPHZrr:
10015 case X86::VADDSHZrr:
10016 case X86::VMULPHZ128rr:
10017 case X86::VMULPHZ256rr:
10018 case X86::VMULPHZrr:
10019 case X86::VMULSHZrr:
10030static std::optional<ParamLoadedValue>
10033 Register DestReg =
MI.getOperand(0).getReg();
10034 Register SrcReg =
MI.getOperand(1).getReg();
10039 if (DestReg == DescribedReg)
10044 if (
unsigned SubRegIdx =
TRI->getSubRegIndex(DestReg, DescribedReg)) {
10045 Register SrcSubReg =
TRI->getSubReg(SrcReg, SubRegIdx);
10055 if (
MI.getOpcode() == X86::MOV8rr ||
MI.getOpcode() == X86::MOV16rr ||
10056 !
TRI->isSuperRegister(DestReg, DescribedReg))
10057 return std::nullopt;
10059 assert(
MI.getOpcode() == X86::MOV32rr &&
"Unexpected super-register case");
10063std::optional<ParamLoadedValue>
10070 switch (
MI.getOpcode()) {
10073 case X86::LEA64_32r: {
10075 if (!
TRI->isSuperRegisterEq(
MI.getOperand(0).getReg(), Reg))
10076 return std::nullopt;
10080 if (!
MI.getOperand(4).isImm() || !
MI.getOperand(2).isImm())
10081 return std::nullopt;
10090 if ((Op1.
isReg() && Op1.
getReg() ==
MI.getOperand(0).getReg()) ||
10091 Op2.
getReg() ==
MI.getOperand(0).getReg())
10092 return std::nullopt;
10093 else if ((Op1.
isReg() && Op1.
getReg() != X86::NoRegister &&
10094 TRI->regsOverlap(Op1.
getReg(),
MI.getOperand(0).getReg())) ||
10095 (Op2.
getReg() != X86::NoRegister &&
10096 TRI->regsOverlap(Op2.
getReg(),
MI.getOperand(0).getReg())))
10097 return std::nullopt;
10099 int64_t Coef =
MI.getOperand(2).getImm();
10100 int64_t
Offset =
MI.getOperand(4).getImm();
10103 if ((Op1.
isReg() && Op1.
getReg() != X86::NoRegister)) {
10105 }
else if (Op1.
isFI())
10108 if (
Op &&
Op->isReg() &&
Op->getReg() == Op2.
getReg() && Coef > 0) {
10113 if (
Op && Op2.
getReg() != X86::NoRegister) {
10114 int dwarfReg =
TRI->getDwarfRegNum(Op2.
getReg(),
false);
10116 return std::nullopt;
10117 else if (dwarfReg < 32) {
10118 Ops.
push_back(dwarf::DW_OP_breg0 + dwarfReg);
10137 if (((Op1.
isReg() && Op1.
getReg() != X86::NoRegister) || Op1.
isFI()) &&
10138 Op2.
getReg() != X86::NoRegister) {
10151 return std::nullopt;
10154 case X86::MOV64ri32:
10157 if (!
TRI->isSuperRegisterEq(
MI.getOperand(0).getReg(), Reg))
10158 return std::nullopt;
10165 case X86::XOR32rr: {
10168 if (!
TRI->isSuperRegisterEq(
MI.getOperand(0).getReg(), Reg))
10169 return std::nullopt;
10170 if (
MI.getOperand(1).getReg() ==
MI.getOperand(2).getReg())
10172 return std::nullopt;
10174 case X86::MOVSX64rr32: {
10181 if (!
TRI->isSubRegisterEq(
MI.getOperand(0).getReg(), Reg))
10182 return std::nullopt;
10191 if (Reg ==
MI.getOperand(0).getReg())
10194 assert(X86MCRegisterClasses[X86::GR32RegClassID].
contains(Reg) &&
10195 "Unhandled sub-register case for MOVSX64rr32");
10200 assert(!
MI.isMoveImmediate() &&
"Unexpected MoveImm instruction");
10217 assert(!OldFlagDef1 == !OldFlagDef2 &&
10218 "Unexpected instruction type for reassociation");
10220 if (!OldFlagDef1 || !OldFlagDef2)
10224 "Must have dead EFLAGS operand in reassociable instruction");
10231 assert(NewFlagDef1 && NewFlagDef2 &&
10232 "Unexpected operand in reassociable instruction");
10242std::pair<unsigned, unsigned>
10244 return std::make_pair(TF, 0u);
10249 using namespace X86II;
10250 static const std::pair<unsigned, const char *> TargetFlags[] = {
10251 {MO_GOT_ABSOLUTE_ADDRESS,
"x86-got-absolute-address"},
10252 {MO_PIC_BASE_OFFSET,
"x86-pic-base-offset"},
10253 {MO_GOT,
"x86-got"},
10254 {MO_GOTOFF,
"x86-gotoff"},
10255 {MO_GOTPCREL,
"x86-gotpcrel"},
10256 {MO_GOTPCREL_NORELAX,
"x86-gotpcrel-norelax"},
10257 {MO_PLT,
"x86-plt"},
10258 {MO_TLSGD,
"x86-tlsgd"},
10259 {MO_TLSLD,
"x86-tlsld"},
10260 {MO_TLSLDM,
"x86-tlsldm"},
10261 {MO_GOTTPOFF,
"x86-gottpoff"},
10262 {MO_INDNTPOFF,
"x86-indntpoff"},
10263 {MO_TPOFF,
"x86-tpoff"},
10264 {MO_DTPOFF,
"x86-dtpoff"},
10265 {MO_NTPOFF,
"x86-ntpoff"},
10266 {MO_GOTNTPOFF,
"x86-gotntpoff"},
10267 {MO_DLLIMPORT,
"x86-dllimport"},
10268 {MO_DARWIN_NONLAZY,
"x86-darwin-nonlazy"},
10269 {MO_DARWIN_NONLAZY_PIC_BASE,
"x86-darwin-nonlazy-pic-base"},
10270 {MO_TLVP,
"x86-tlvp"},
10271 {MO_TLVP_PIC_BASE,
"x86-tlvp-pic-base"},
10272 {MO_SECREL,
"x86-secrel"},
10273 {MO_COFFSTUB,
"x86-coffstub"}};
10290 if (!TM->isPositionIndependent())
10297 if (GlobalBaseReg == 0)
10309 PC =
RegInfo.createVirtualRegister(&X86::GR32RegClass);
10311 PC = GlobalBaseReg;
10313 if (STI.is64Bit()) {
10366 StringRef getPassName()
const override {
10367 return "X86 PIC Global Base Reg Initialization";
10396 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
10407 bool Changed =
false;
10412 switch (
I->getOpcode()) {
10413 case X86::TLS_base_addr32:
10414 case X86::TLS_base_addr64:
10415 if (TLSBaseAddrReg)
10416 I = ReplaceTLSBaseAddrCall(*
I, TLSBaseAddrReg);
10418 I = SetRegister(*
I, &TLSBaseAddrReg);
10427 for (
auto &
I : *
Node) {
10428 Changed |= VisitNode(
I, TLSBaseAddrReg);
10437 unsigned TLSBaseAddrReg) {
10440 const bool is64Bit = STI.is64Bit();
10446 TII->get(TargetOpcode::COPY),
is64Bit ? X86::RAX : X86::EAX)
10447 .
addReg(TLSBaseAddrReg);
10450 I.eraseFromParent();
10460 const bool is64Bit = STI.is64Bit();
10465 *TLSBaseAddrReg =
RegInfo.createVirtualRegister(
10466 is64Bit ? &X86::GR64RegClass : &X86::GR32RegClass);
10471 TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
10477 StringRef getPassName()
const override {
10478 return "Local Dynamic TLS Access Clean-up";
10489char LDTLSCleanup::ID = 0;
10491 return new LDTLSCleanup();
10524std::optional<outliner::OutlinedFunction>
10527 std::vector<outliner::Candidate> &RepeatedSequenceLocs)
const {
10528 unsigned SequenceSize = 0;
10529 for (
auto &
MI : RepeatedSequenceLocs[0]) {
10533 if (
MI.isDebugInstr() ||
MI.isKill())
10540 unsigned CFICount = 0;
10541 for (
auto &
I : RepeatedSequenceLocs[0]) {
10542 if (
I.isCFIInstruction())
10552 std::vector<MCCFIInstruction> CFIInstructions =
10553 C.getMF()->getFrameInstructions();
10555 if (CFICount > 0 && CFICount != CFIInstructions.size())
10556 return std::nullopt;
10560 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10571 return std::nullopt;
10595 if (!OutlineFromLinkOnceODRs &&
F.hasLinkOnceODRLinkage())
10605 unsigned Flags)
const {
10609 if (
MI.isTerminator())
10623 if (
MI.modifiesRegister(X86::RSP, &RI) ||
MI.readsRegister(X86::RSP, &RI) ||
10624 MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
10625 MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
10629 if (
MI.readsRegister(X86::RIP, &RI) ||
10630 MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
10631 MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
10635 if (
MI.isCFIInstruction())
10661 .addGlobalAddress(M.getNamedValue(MF.
getName())));
10665 .addGlobalAddress(M.getNamedValue(MF.
getName())));
10674 bool AllowSideEffects)
const {
10679 if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
10683 if (
TRI.isGeneralPurposeRegister(MF, Reg)) {
10688 if (!AllowSideEffects)
10695 }
else if (X86::VR128RegClass.
contains(Reg)) {
10704 }
else if (X86::VR256RegClass.
contains(Reg)) {
10713 }
else if (X86::VR512RegClass.
contains(Reg)) {
10715 if (!ST.hasAVX512())
10722 }
else if (X86::VK1RegClass.
contains(Reg) || X86::VK2RegClass.
contains(Reg) ||
10724 X86::VK16RegClass.
contains(Reg)) {
10729 unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
10738 bool DoRegPressureReduce)
const {
10741 case X86::VPDPWSSDrr:
10742 case X86::VPDPWSSDrm:
10743 case X86::VPDPWSSDYrr:
10744 case X86::VPDPWSSDYrm: {
10745 if (!Subtarget.hasFastDPWSSD()) {
10751 case X86::VPDPWSSDZ128r:
10752 case X86::VPDPWSSDZ128m:
10753 case X86::VPDPWSSDZ256r:
10754 case X86::VPDPWSSDZ256m:
10755 case X86::VPDPWSSDZr:
10756 case X86::VPDPWSSDZm: {
10757 if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10765 Patterns, DoRegPressureReduce);
10777 unsigned AddOpc = 0;
10778 unsigned MaddOpc = 0;
10781 assert(
false &&
"It should not reach here");
10787 case X86::VPDPWSSDrr:
10788 MaddOpc = X86::VPMADDWDrr;
10789 AddOpc = X86::VPADDDrr;
10791 case X86::VPDPWSSDrm:
10792 MaddOpc = X86::VPMADDWDrm;
10793 AddOpc = X86::VPADDDrr;
10795 case X86::VPDPWSSDZ128r:
10796 MaddOpc = X86::VPMADDWDZ128rr;
10797 AddOpc = X86::VPADDDZ128rr;
10799 case X86::VPDPWSSDZ128m:
10800 MaddOpc = X86::VPMADDWDZ128rm;
10801 AddOpc = X86::VPADDDZ128rr;
10807 case X86::VPDPWSSDYrr:
10808 MaddOpc = X86::VPMADDWDYrr;
10809 AddOpc = X86::VPADDDYrr;
10811 case X86::VPDPWSSDYrm:
10812 MaddOpc = X86::VPMADDWDYrm;
10813 AddOpc = X86::VPADDDYrr;
10815 case X86::VPDPWSSDZ256r:
10816 MaddOpc = X86::VPMADDWDZ256rr;
10817 AddOpc = X86::VPADDDZ256rr;
10819 case X86::VPDPWSSDZ256m:
10820 MaddOpc = X86::VPMADDWDZ256rm;
10821 AddOpc = X86::VPADDDZ256rr;
10827 case X86::VPDPWSSDZr:
10828 MaddOpc = X86::VPMADDWDZrr;
10829 AddOpc = X86::VPADDDZrr;
10831 case X86::VPDPWSSDZm:
10832 MaddOpc = X86::VPMADDWDZrm;
10833 AddOpc = X86::VPADDDZrr;
10845 InstrIdxForVirtReg.
insert(std::make_pair(NewReg, 0));
10867 DelInstrs, InstrIdxForVirtReg);
10871 InstrIdxForVirtReg);
10881 M.Base.FrameIndex = FI;
10882 M.getFullAddress(Ops);
10885#define GET_INSTRINFO_HELPERS
10886#include "X86GenInstrInfo.inc"
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
Expand a single-def pseudo instruction to a two-addr instruction with two undef reads of the register...
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
#define FROM_TO(FROM, TO)
static bool is64Bit(const char *name)
#define GET_EGPR_IF_ENABLED(OPC)
static bool isLEA(unsigned Opcode)
static void addOperands(MachineInstrBuilder &MIB, ArrayRef< MachineOperand > MOs, int PtrOffset=0)
static std::optional< ParamLoadedValue > describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetRegisterInfo *TRI)
If DescribedReg overlaps with the MOVrr instruction's destination register then, if possible,...
static cl::opt< unsigned > PartialRegUpdateClearance("partial-reg-update-clearance", cl::desc("Clearance between two register writes " "for inserting XOR to avoid partial " "register update"), cl::init(64), cl::Hidden)
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI)
static bool isConvertibleLEA(MachineInstr *MI)
static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, const X86Subtarget &Subtarget)
static bool isAMXOpcode(unsigned Opc)
static int getJumpTableIndexFromReg(const MachineRegisterInfo &MRI, Register Reg)
static void updateOperandRegConstraints(MachineFunction &MF, MachineInstr &NewMI, const TargetInstrInfo &TII)
static bool findRedundantFlagInstr(MachineInstr &CmpInstr, MachineInstr &CmpValDefInstr, const MachineRegisterInfo *MRI, MachineInstr **AndInstr, const TargetRegisterInfo *TRI, bool &NoSignFlag, bool &ClearsOverflowFlag)
static int getJumpTableIndexFromAddr(const MachineInstr &MI)
static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, unsigned NewWidth, unsigned *pNewMask=nullptr)
static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne)
static unsigned getNewOpcFromTable(ArrayRef< X86TableEntry > Table, unsigned Opc)
static unsigned getStoreRegOpcode(Register SrcReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
#define FOLD_BROADCAST(SIZE)
static cl::opt< unsigned > UndefRegClearance("undef-reg-clearance", cl::desc("How many idle instructions we would like before " "certain undef register reads"), cl::init(128), cl::Hidden)
#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64)
static bool isTruncatedShiftCountForLEA(unsigned ShAmt)
Check whether the given shift count is appropriate can be represented by a LEA instruction.
static cl::opt< bool > ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden)
static SmallVector< MachineMemOperand *, 2 > extractLoadMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static MachineInstr * fuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII)
static void printFailMsgforFold(const MachineInstr &MI, unsigned Idx)
static bool canConvert2Copy(unsigned Opc)
static cl::opt< bool > NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions"), cl::Hidden)
static bool expandNOVLXStore(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx)
static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes)
static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, Register Reg)
Expand a single-def pseudo instruction to a two-addr instruction with two k0 reads.
#define VPERM_CASES_BROADCAST(Suffix)
static X86::CondCode isUseDefConvertible(const MachineInstr &MI)
Check whether the use can be converted to remove a comparison against zero.
static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
static unsigned getLoadRegOpcode(Register DestReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, bool ForLoadFold=false)
static MachineInstr * makeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI)
#define GET_ND_IF_ENABLED(OPC)
static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget, bool ForLoadFold=false)
Return true for all instructions that only update the first 32 or 64-bits of the destination register...
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, const X86Subtarget &Subtarget)
static const uint16_t * lookupAVX512(unsigned opcode, unsigned domain, ArrayRef< uint16_t[4]> Table)
static unsigned getLoadStoreRegOpcode(Register Reg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI, bool Load)
#define VPERM_CASES(Suffix)
#define FROM_TO_SIZE(A, B, S)
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes)
static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, bool &ClearsOverflowFlag)
Check whether the definition can be converted to remove a comparison against zero.
static bool isHReg(unsigned Reg)
Test if the given register is a physical h register.
static MachineInstr * fuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII, int PtrOffset=0)
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode)
static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static MachineBasicBlock * getFallThroughMBB(MachineBasicBlock *MBB, MachineBasicBlock *TBB)
static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineInstr &UserMI, const MachineFunction &MF)
Check if LoadMI is a partial register load that we can't fold into MI because the latter uses content...
static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI)
static cl::opt< bool > PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to" " fuse, but the X86 backend currently can't"), cl::Hidden)
static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx)
static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
This determines which of three possible cases of a three source commute the source indexes correspond...
static bool isX87Reg(unsigned Reg)
Return true if the Reg is X87 register.
static void genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
static unsigned getTruncatedShiftCount(const MachineInstr &MI, unsigned ShiftAmtOperandIdx)
Check whether the shift count for a machine operand is non-zero.
static SmallVector< MachineMemOperand *, 2 > extractStoreMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static unsigned getBroadcastOpcode(const X86FoldTableEntry *I, const TargetRegisterClass *RC, const X86Subtarget &STI)
static unsigned convertALUrr2ALUri(unsigned Opc)
Convert an ALUrr opcode to corresponding ALUri opcode.
static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI)
Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
static bool isCommutableVPERMV3Instruction(unsigned Opcode)
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
static DIExpression * appendExt(const DIExpression *Expr, unsigned FromSize, unsigned ToSize, bool Signed)
Append a zero- or sign-extension to Expr.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Base class for the actual dominator tree node.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
LiveInterval - This class represents the liveness of a register, or stack slot.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
void stepForward(const MachineInstr &MI, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand * > > &Clobbers)
Simulates liveness when stepping forward over an instruction(bundle).
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
bool usesWindowsCFI() const
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Instances of this class represent a single low-level machine instruction.
void setOpcode(unsigned Op)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
Wrapper class representing physical registers. Should be passed by value.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned pred_size() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
@ LQR_Dead
Register is known to be fully dead.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
union llvm::MachineConstantPoolEntry::@204 Val
The constant itself.
const Constant * ConstVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * getRootNode() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_iterator operands_begin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
void dropDebugNumber()
Drop any variable location debugging information associated with this instruction.
void setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just prior to the instruction itself.
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
bool isSafeToMove(bool &SawStore) const
Return true if it is safe to move this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void setFlag(MIFlag Flag)
Set a MI flag.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
unsigned getNumDefs() const
Returns the total number of definitions.
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateCPI(unsigned Idx, int Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const TargetRegisterInfo * getTargetRegisterInfo() const
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A Module instance is used to store all the information related to an LLVM module.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
MachineFunction & getMachineFunction() const
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const
Return true when \P Inst has reassociable operands in the same \P MBB.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
bool isPositionIndependent() const
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TypeSize getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
SlotIndex def
The index of the defining instruction.
LLVM Value Representation.
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void getFrameIndexOperands(SmallVectorImpl< MachineOperand > &Ops, int FI) const override
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
Check if there exists an earlier instruction that operates on the same source operands and sets eflag...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Overrides the isSchedulingBoundary from Codegen/TargetInstrInfo.cpp to make it capable of identifying...
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
void replaceBranchWithTailCall(MachineBasicBlock &MBB, SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Returns true iff the routine could find two commutable operands in the given machine instruction.
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override
Returns true if we have preference on the operands order in MI, the commute decision is returned in C...
bool hasLiveCondCodeDef(MachineInstr &MI) const
True if MI has a condition code def, e.g.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool canMakeTailCallConditional(SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
convertToThreeAddress - This method must be implemented by targets that set the M_CONVERTIBLE_TO_3_AD...
X86InstrInfo(X86Subtarget &STI)
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
MCInst getNop() const override
Return the noop instruction to use for a noop.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
This is a used by the pre-regalloc scheduler to determine (in conjunction with areLoadsFromSameBasePt...
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr * > &NewMIs) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
Fold a load or store of the specified stack slot into the specified machine instruction for the speci...
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds potential patterns, this function generates the instructions ...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isStoreToStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
bool isUnconditionalTailCall(const MachineInstr &MI) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isLoadFromStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const
int getSPAdjust(const MachineInstr &MI) const override
getSPAdjust - This returns the stack pointer adjustment made by this instruction.
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
int getJumpTableIndex(const MachineInstr &MI) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override
This is an architecture-specific helper function of reassociateOps.
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
isCoalescableExtInstr - Return true if the instruction is a "coalescable" extension instruction.
void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Opc, Register Reg, int FrameIdx, bool isKill=false) const
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned LEAOpcode, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV, LiveIntervals *LIS) const
Given an operand within a MachineInstr, insert preceding code to put it into the right format for a p...
bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify=false) const override
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before certain undef register...
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
uint16_t getExecutionDomainCustom(const MachineInstr &MI) const
bool isHighLatencyDef(int opc) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, const X86InstrFMA3Group &FMA3Group) const
Returns an adjusted FMA opcode that must be used in FMA instruction that performs the same computatio...
bool preservesZeroValueInReg(const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before a partial register upd...
MachineInstr * optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, Register &FoldAsLoadDefReg, MachineInstr *&DefMI) const override
Try to remove the load by folding it to a register operand at the use.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Register getGlobalBaseReg() const
int getTCReturnAddrDelta() const
void setGlobalBaseReg(Register Reg)
unsigned getNumLocalDynamicTLSAccesses() const
bool getUsesRedZone() const
bool canRealignStack(const MachineFunction &MF) const override
bool isPICStyleGOT() const
bool isTargetWin64() const
const X86InstrInfo * getInstrInfo() const override
const X86RegisterInfo * getRegisterInfo() const override
const X86FrameLowering * getFrameLowering() const override
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ X86
Windows x64, Windows Itanium (IA-64)
Reg
All possible values of the reg field in the ModR/M byte.
bool isKMergeMasked(uint64_t TSFlags)
@ MO_GOT_ABSOLUTE_ADDRESS
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [.
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
bool canUseApxExtendedReg(const MCInstrDesc &Desc)
bool isPseudo(uint64_t TSFlags)
bool isKMasked(uint64_t TSFlags)
int getMemoryOperandNo(uint64_t TSFlags)
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
@ SSEDomainShift
Execution domain for SSE instructions.
CondCode getCondFromBranch(const MachineInstr &MI)
CondCode getCondFromCFCMov(const MachineInstr &MI)
CondCode getCondFromMI(const MachineInstr &MI)
Return the condition code of the instruction.
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
unsigned getSwappedVCMPImm(unsigned Imm)
Get the VCMP immediate if the opcodes are swapped.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getSwappedVPCOMImm(unsigned Imm)
Get the VPCOM immediate if the opcodes are swapped.
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
unsigned getNonNDVariant(unsigned Opc)
unsigned getVPCMPImmForCond(ISD::CondCode CC)
Get the VPCMP immediate for the given condition.
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
CondCode getCondFromSETCC(const MachineInstr &MI)
unsigned getSwappedVPCMPImm(unsigned Imm)
Get the VPCMP immediate if the opcodes are swapped.
CondCode getCondFromCCMP(const MachineInstr &MI)
int getCCMPCondFlagsFromCondCode(CondCode CC)
int getCondSrcNoFromDesc(const MCInstrDesc &MCID)
Return the source operand # for condition code by MCID.
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
unsigned getNFVariant(unsigned Opc)
unsigned getVectorRegisterWidth(const MCOperandInfo &Info)
Get the width of the vector register operand.
CondCode getCondFromCMov(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
static bool isMem(const MachineInstr &MI, unsigned Op)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createX86GlobalBaseRegPass()
This pass initializes a global base register for PIC on x86-32.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
MaybeAlign getAlign(const Function &F, unsigned Index)
FunctionPass * createCleanupLocalDynamicTLSPass()
This pass combines multiple accesses to local-dynamic TLS variables so that the TLS base address for ...
const X86FoldTableEntry * lookupBroadcastFoldTable(unsigned RegOp, unsigned OpNum)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
const X86InstrFMA3Group * getFMA3Group(unsigned Opcode, uint64_t TSFlags)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
const X86FoldTableEntry * lookupTwoAddrFoldTable(unsigned RegOp)
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
unsigned getUndefRegState(bool B)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
unsigned getDefRegState(bool B)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
const X86FoldTableEntry * lookupUnfoldTable(unsigned MemOp)
constexpr unsigned BitWidth
bool matchBroadcastSize(const X86FoldTableEntry &Entry, unsigned BroadcastBits)
const X86FoldTableEntry * lookupFoldTable(unsigned RegOp, unsigned OpNum)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
This represents a simple continuous liveness interval for a value.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
X86AddressMode - This struct holds a generalized full x86 address mode.
This class is used to group {132, 213, 231} forms of FMA opcodes together.
unsigned get213Opcode() const
Returns the 213 form of FMA opcode.
unsigned get231Opcode() const
Returns the 231 form of FMA opcode.
bool isIntrinsic() const
Returns true iff the group of FMA opcodes holds intrinsic opcodes.
unsigned get132Opcode() const
Returns the 132 form of FMA opcode.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.