23#define DEBUG_TYPE "si-fold-operands"
44 unsigned DefSubReg = AMDGPU::NoSubRegister;
49 FoldableDef() =
delete;
51 unsigned DefSubReg = AMDGPU::NoSubRegister)
52 : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.
getType()) {
55 ImmToFold = FoldOp.
getImm();
56 }
else if (FoldOp.
isFI()) {
57 FrameIndexToFold = FoldOp.
getIndex();
67 unsigned DefSubReg = AMDGPU::NoSubRegister)
68 : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
73 FoldableDef Copy(*
this);
74 Copy.DefSubReg =
TRI.composeSubRegIndices(DefSubReg,
SubReg);
82 return OpToFold->getReg();
85 unsigned getSubReg()
const {
87 return OpToFold->getSubReg();
98 return FrameIndexToFold;
106 std::optional<int64_t> getEffectiveImmVal()
const {
114 unsigned OpIdx)
const {
117 std::optional<int64_t> ImmToFold = getEffectiveImmVal();
127 if (DefSubReg != AMDGPU::NoSubRegister)
135 if (DefSubReg != AMDGPU::NoSubRegister)
137 return TII.isOperandLegal(
MI,
OpIdx, OpToFold);
144struct FoldCandidate {
152 bool Commuted =
false,
int ShrinkOp = -1)
153 :
UseMI(
MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
154 Commuted(Commuted) {}
156 bool isFI()
const {
return Def.isFI(); }
160 return Def.FrameIndexToFold;
163 bool isImm()
const {
return Def.isImm(); }
165 bool isReg()
const {
return Def.isReg(); }
169 bool isGlobal()
const {
return Def.isGlobal(); }
171 bool needsShrink()
const {
return ShrinkOpcode != -1; }
174class SIFoldOperandsImpl {
184 const FoldableDef &OpToFold)
const;
187 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
189 case AMDGPU::S_ADD_I32: {
190 if (ST->hasAddNoCarry())
191 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
192 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
194 case AMDGPU::S_OR_B32:
195 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
196 case AMDGPU::S_AND_B32:
197 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
198 case AMDGPU::S_MUL_I32:
199 return AMDGPU::V_MUL_LO_U32_e64;
201 return AMDGPU::INSTRUCTION_LIST_END;
205 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
211 int64_t ImmVal)
const;
215 int64_t ImmVal)
const;
219 const FoldableDef &OpToFold)
const;
228 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
231 std::pair<int64_t, const TargetRegisterClass *>
249 bool foldInstOperand(
MachineInstr &
MI,
const FoldableDef &OpToFold)
const;
251 bool foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const;
258 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
267 SIFoldOperandsImpl() =
default;
281 return SIFoldOperandsImpl().run(MF);
284 StringRef getPassName()
const override {
return "SI Fold Operands"; }
301char SIFoldOperandsLegacy::
ID = 0;
310 TRI.getSubRegisterClass(RC, MO.getSubReg()))
318 case AMDGPU::V_MAC_F32_e64:
319 return AMDGPU::V_MAD_F32_e64;
320 case AMDGPU::V_MAC_F16_e64:
321 return AMDGPU::V_MAD_F16_e64;
322 case AMDGPU::V_FMAC_F32_e64:
323 return AMDGPU::V_FMA_F32_e64;
324 case AMDGPU::V_FMAC_F16_e64:
325 return AMDGPU::V_FMA_F16_gfx9_e64;
326 case AMDGPU::V_FMAC_F16_t16_e64:
327 return AMDGPU::V_FMA_F16_gfx9_t16_e64;
328 case AMDGPU::V_FMAC_F16_fake16_e64:
329 return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
330 case AMDGPU::V_FMAC_LEGACY_F32_e64:
331 return AMDGPU::V_FMA_LEGACY_F32_e64;
332 case AMDGPU::V_FMAC_F64_e64:
333 return AMDGPU::V_FMA_F64_e64;
335 return AMDGPU::INSTRUCTION_LIST_END;
341 const FoldableDef &OpToFold)
const {
342 if (!OpToFold.isFI())
345 const unsigned Opc =
UseMI.getOpcode();
347 case AMDGPU::S_ADD_I32:
348 case AMDGPU::S_ADD_U32:
349 case AMDGPU::V_ADD_U32_e32:
350 case AMDGPU::V_ADD_CO_U32_e32:
354 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
356 case AMDGPU::V_ADD_U32_e64:
357 case AMDGPU::V_ADD_CO_U32_e64:
358 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
365 return OpNo == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
369 int SIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
373 int VIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
374 return OpNo == VIdx && SIdx == -1;
380bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
382 if (
TRI->isVGPR(*
MRI, DstReg) &&
TRI->isSGPRReg(*
MRI, SrcReg) &&
383 MRI->hasOneNonDBGUse(SrcReg)) {
384 MachineInstr *
Def =
MRI->getVRegDef(SrcReg);
385 if (!Def ||
Def->getNumOperands() != 4)
388 MachineOperand *Src0 = &
Def->getOperand(1);
389 MachineOperand *Src1 = &
Def->getOperand(2);
400 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
401 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
402 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
403 !
Def->getOperand(3).isDead())
406 MachineBasicBlock *
MBB =
Def->getParent();
408 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
409 MachineInstrBuilder
Add =
412 if (
Add->getDesc().getNumDefs() == 2) {
413 Register CarryOutReg =
MRI->createVirtualRegister(
TRI->getBoolRC());
415 MRI->setRegAllocationHint(CarryOutReg, 0,
TRI->getVCC());
418 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
422 Def->eraseFromParent();
423 MI.eraseFromParent();
427 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
438 Def->eraseFromParent();
439 MI.eraseFromParent();
448 return new SIFoldOperandsLegacy();
451bool SIFoldOperandsImpl::canUseImmWithOpSel(
const MachineInstr *
MI,
453 int64_t ImmVal)
const {
454 const uint64_t TSFlags =
MI->getDesc().TSFlags;
462 int OpNo =
MI->getOperandNo(&Old);
464 unsigned Opcode =
MI->getOpcode();
465 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
487bool SIFoldOperandsImpl::tryFoldImmWithOpSel(MachineInstr *
MI,
unsigned UseOpNo,
488 int64_t ImmVal)
const {
489 MachineOperand &Old =
MI->getOperand(UseOpNo);
490 unsigned Opcode =
MI->getOpcode();
491 int OpNo =
MI->getOperandNo(&Old);
492 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
504 AMDGPU::OpName ModName = AMDGPU::OpName::NUM_OPERAND_NAMES;
505 unsigned SrcIdx = ~0;
506 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
507 ModName = AMDGPU::OpName::src0_modifiers;
509 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
510 ModName = AMDGPU::OpName::src1_modifiers;
512 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
513 ModName = AMDGPU::OpName::src2_modifiers;
516 assert(ModName != AMDGPU::OpName::NUM_OPERAND_NAMES);
517 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModName);
518 MachineOperand &
Mod =
MI->getOperand(ModIdx);
519 unsigned ModVal =
Mod.getImm();
525 uint32_t
Imm = (
static_cast<uint32_t
>(ImmHi) << 16) | ImmLo;
530 auto tryFoldToInline = [&](uint32_t
Imm) ->
bool {
539 uint16_t
Lo =
static_cast<uint16_t
>(
Imm);
540 uint16_t
Hi =
static_cast<uint16_t
>(
Imm >> 16);
543 Mod.setImm(NewModVal);
548 if (
static_cast<int16_t
>(
Lo) < 0) {
549 int32_t SExt =
static_cast<int16_t
>(
Lo);
551 Mod.setImm(NewModVal);
566 uint32_t Swapped = (
static_cast<uint32_t
>(
Lo) << 16) |
Hi;
577 if (tryFoldToInline(Imm))
586 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
587 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
588 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
590 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp);
591 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
594 uint16_t NegLo = -
static_cast<uint16_t
>(
Imm);
595 uint16_t NegHi = -
static_cast<uint16_t
>(
Imm >> 16);
596 uint32_t NegImm = (
static_cast<uint32_t
>(NegHi) << 16) | NegLo;
598 if (tryFoldToInline(NegImm)) {
600 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
601 MI->setDesc(
TII->get(NegOpcode));
610bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
611 MachineInstr *
MI = Fold.UseMI;
612 MachineOperand &Old =
MI->getOperand(Fold.UseOpNo);
615 std::optional<int64_t> ImmVal;
617 ImmVal = Fold.Def.getEffectiveImmVal();
619 if (ImmVal && canUseImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal)) {
620 if (tryFoldImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal))
626 int OpNo =
MI->getOperandNo(&Old);
627 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
633 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
634 MachineBasicBlock *
MBB =
MI->getParent();
641 int Op32 = Fold.ShrinkOpcode;
642 MachineOperand &Dst0 =
MI->getOperand(0);
643 MachineOperand &Dst1 =
MI->getOperand(1);
646 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
648 const TargetRegisterClass *Dst0RC =
MRI->getRegClass(Dst0.
getReg());
649 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
651 MachineInstr *Inst32 =
TII->buildShrunkInst(*
MI, Op32);
653 if (HaveNonDbgCarryUse) {
666 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
667 MI->removeOperand(
I);
668 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
671 TII->commuteInstruction(*Inst32,
false);
675 assert(!Fold.needsShrink() &&
"not handled");
680 if (NewMFMAOpc == -1)
682 MI->setDesc(
TII->get(NewMFMAOpc));
683 MI->untieRegOperand(0);
684 const MCInstrDesc &MCID =
MI->getDesc();
685 for (
unsigned I = 0;
I <
MI->getNumDefs(); ++
I)
687 MI->getOperand(
I).setIsEarlyClobber(
true);
692 int OpNo =
MI->getOperandNo(&Old);
693 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
700 if (Fold.isGlobal()) {
701 Old.
ChangeToGA(Fold.Def.OpToFold->getGlobal(),
702 Fold.Def.OpToFold->getOffset(),
703 Fold.Def.OpToFold->getTargetFlags());
712 MachineOperand *
New = Fold.Def.OpToFold;
715 if (
const TargetRegisterClass *OpRC =
716 TII->getRegClass(
MI->getDesc(), Fold.UseOpNo)) {
717 const TargetRegisterClass *NewRC =
718 TRI->getRegClassForReg(*
MRI,
New->getReg());
720 const TargetRegisterClass *ConstrainRC = OpRC;
721 if (
New->getSubReg()) {
723 TRI->getMatchingSuperRegClass(NewRC, OpRC,
New->getSubReg());
729 if (
New->getReg().isVirtual() &&
730 !
MRI->constrainRegClass(
New->getReg(), ConstrainRC)) {
732 <<
TRI->getRegClassName(ConstrainRC) <<
'\n');
741 if (
New->getReg().isPhysical()) {
751 FoldCandidate &&Entry) {
753 for (FoldCandidate &Fold : FoldList)
754 if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
756 LLVM_DEBUG(
dbgs() <<
"Append " << (Entry.Commuted ?
"commuted" :
"normal")
757 <<
" operand " << Entry.UseOpNo <<
"\n " << *Entry.UseMI);
763 const FoldableDef &FoldOp,
764 bool Commuted =
false,
int ShrinkOp = -1) {
766 FoldCandidate(
MI, OpNo, FoldOp, Commuted, ShrinkOp));
774 if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput())
784 const FoldableDef &OpToFold) {
785 assert(OpToFold.isImm() &&
"Expected immediate operand");
786 uint64_t ImmVal = OpToFold.getEffectiveImmVal().value();
792bool SIFoldOperandsImpl::tryAddToFoldList(
793 SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *
MI,
unsigned OpNo,
794 const FoldableDef &OpToFold)
const {
795 const unsigned Opc =
MI->getOpcode();
797 auto tryToFoldAsFMAAKorMK = [&]() {
798 if (!OpToFold.isImm())
801 const bool TryAK = OpNo == 3;
802 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
803 MI->setDesc(
TII->get(NewOpc));
806 bool FoldAsFMAAKorMK =
807 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
808 if (FoldAsFMAAKorMK) {
810 MI->untieRegOperand(3);
813 MachineOperand &Op1 =
MI->getOperand(1);
814 MachineOperand &Op2 =
MI->getOperand(2);
831 bool IsLegal = OpToFold.isOperandLegal(*
TII, *
MI, OpNo);
832 if (!IsLegal && OpToFold.isImm()) {
833 if (std::optional<int64_t> ImmVal = OpToFold.getEffectiveImmVal())
834 IsLegal = canUseImmWithOpSel(
MI, OpNo, *ImmVal);
840 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
843 MI->setDesc(
TII->get(NewOpc));
848 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
850 MI->untieRegOperand(OpNo);
854 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
860 if (
Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
861 if (tryToFoldAsFMAAKorMK())
866 if (OpToFold.isImm()) {
868 if (
Opc == AMDGPU::S_SETREG_B32)
869 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
870 else if (
Opc == AMDGPU::S_SETREG_B32_mode)
871 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
873 MI->setDesc(
TII->get(ImmOpc));
882 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
886 MachineOperand &
Op =
MI->getOperand(OpNo);
887 MachineOperand &CommutedOp =
MI->getOperand(CommuteOpNo);
893 if (!
Op.isReg() || !CommutedOp.
isReg())
898 if (
Op.isReg() && CommutedOp.
isReg() &&
899 (
Op.getReg() == CommutedOp.
getReg() &&
903 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
907 if (!OpToFold.isOperandLegal(*
TII, *
MI, CommuteOpNo)) {
908 if ((
Opc != AMDGPU::V_ADD_CO_U32_e64 &&
Opc != AMDGPU::V_SUB_CO_U32_e64 &&
909 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
910 (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
911 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
917 MachineOperand &OtherOp =
MI->getOperand(OpNo);
918 if (!OtherOp.
isReg() ||
925 unsigned MaybeCommutedOpc =
MI->getOpcode();
939 if (
Opc == AMDGPU::S_FMAC_F32 &&
940 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
941 if (tryToFoldAsFMAAKorMK())
947 if (OpToFold.isImm() &&
956bool SIFoldOperandsImpl::isUseSafeToFold(
const MachineInstr &
MI,
957 const MachineOperand &UseMO)
const {
959 return !
TII->isSDWA(
MI);
967 SubDef &&
TII.isFoldableCopy(*SubDef);
968 SubDef =
MRI.getVRegDef(
Sub->getReg())) {
969 unsigned SrcIdx =
TII.getFoldableCopySrcIdx(*SubDef);
978 if (
SrcOp.getSubReg())
985const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
986 MachineInstr &RegSeq,
987 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs)
const {
991 const TargetRegisterClass *RC =
nullptr;
1001 else if (!
TRI->getCommonSubClass(RC, OpRC))
1006 Defs.emplace_back(&SrcOp, SubRegIdx);
1011 if (DefSrc && (DefSrc->
isReg() || DefSrc->
isImm())) {
1012 Defs.emplace_back(DefSrc, SubRegIdx);
1016 Defs.emplace_back(&SrcOp, SubRegIdx);
1025const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
1026 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
1029 if (!Def || !
Def->isRegSequence())
1032 return getRegSeqInit(*Def, Defs);
1035std::pair<int64_t, const TargetRegisterClass *>
1036SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq)
const {
1038 const TargetRegisterClass *SrcRC = getRegSeqInit(RegSeq, Defs);
1042 bool TryToMatchSplat64 =
false;
1045 for (
unsigned I = 0,
E = Defs.
size();
I !=
E; ++
I) {
1046 const MachineOperand *
Op = Defs[
I].first;
1050 int64_t SubImm =
Op->getImm();
1056 if (Imm != SubImm) {
1057 if (
I == 1 && (
E & 1) == 0) {
1060 TryToMatchSplat64 =
true;
1068 if (!TryToMatchSplat64)
1069 return {Defs[0].first->getImm(), SrcRC};
1074 for (
unsigned I = 0,
E = Defs.
size();
I !=
E;
I += 2) {
1075 const MachineOperand *Op0 = Defs[
I].first;
1076 const MachineOperand *Op1 = Defs[
I + 1].first;
1081 unsigned SubReg0 = Defs[
I].second;
1082 unsigned SubReg1 = Defs[
I + 1].second;
1086 if (
TRI->getChannelFromSubReg(SubReg0) + 1 !=
1087 TRI->getChannelFromSubReg(SubReg1))
1092 SplatVal64 = MergedVal;
1093 else if (SplatVal64 != MergedVal)
1097 const TargetRegisterClass *RC64 =
TRI->getSubRegisterClass(
1100 return {SplatVal64, RC64};
1103bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
1104 MachineInstr *
UseMI,
unsigned UseOpIdx, int64_t SplatVal,
1105 const TargetRegisterClass *SplatRC)
const {
1107 if (UseOpIdx >=
Desc.getNumOperands())
1114 int16_t RCID =
TII->getOpRegClassID(
Desc.operands()[UseOpIdx]);
1118 const TargetRegisterClass *OpRC =
TRI->getRegClass(RCID);
1123 if (SplatVal != 0 && SplatVal != -1) {
1127 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
1133 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
1138 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
1144 if (!
TRI->getCommonSubClass(OpRC, SplatRC))
1149 if (!
TII->isOperandLegal(*
UseMI, UseOpIdx, &TmpOp))
1155bool SIFoldOperandsImpl::tryToFoldACImm(
1156 const FoldableDef &OpToFold, MachineInstr *
UseMI,
unsigned UseOpIdx,
1157 SmallVectorImpl<FoldCandidate> &FoldList)
const {
1159 if (UseOpIdx >=
Desc.getNumOperands())
1166 if (OpToFold.isImm() && OpToFold.isOperandLegal(*
TII, *
UseMI, UseOpIdx)) {
1177void SIFoldOperandsImpl::foldOperand(
1178 FoldableDef OpToFold, MachineInstr *
UseMI,
int UseOpIdx,
1179 SmallVectorImpl<FoldCandidate> &FoldList,
1180 SmallVectorImpl<MachineInstr *> &CopiesToReplace)
const {
1183 if (!isUseSafeToFold(*
UseMI, *UseOp))
1187 if (UseOp->
isReg() && OpToFold.isReg()) {
1191 if (UseOp->
getSubReg() != AMDGPU::NoSubRegister &&
1193 !
TRI->isSGPRReg(*
MRI, OpToFold.getReg())))
1205 const TargetRegisterClass *SplatRC;
1206 std::tie(SplatVal, SplatRC) = isRegSeqSplat(*
UseMI);
1211 for (
unsigned I = 0;
I != UsesToProcess.size(); ++
I) {
1212 MachineOperand *RSUse = UsesToProcess[
I];
1213 MachineInstr *RSUseMI = RSUse->
getParent();
1223 if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
1224 FoldableDef SplatDef(SplatVal, SplatRC);
1231 if (RSUse->
getSubReg() != RegSeqDstSubReg)
1236 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
1243 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
1246 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
1251 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
1257 MachineOperand &SOff =
1258 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
1269 TII->getNamedOperand(*
UseMI, AMDGPU::OpName::cpol)->getImm();
1284 bool FoldingImmLike =
1285 OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1293 const TargetRegisterClass *SrcRC =
MRI->getRegClass(SrcReg);
1301 const TargetRegisterClass *DestRC =
TRI->getRegClassForReg(*
MRI, DestReg);
1304 for (
unsigned MovOp :
1305 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1306 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1307 AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
1308 AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
1309 const MCInstrDesc &MovDesc =
TII->get(MovOp);
1310 const TargetRegisterClass *MovDstRC =
1319 const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
1321 int16_t RegClassID =
TII->getOpRegClassID(MovDesc.
operands()[SrcIdx]);
1322 if (RegClassID != -1) {
1323 const TargetRegisterClass *MovSrcRC =
TRI->getRegClass(RegClassID);
1326 MovSrcRC =
TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
1330 if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
1331 (!OpToFold.isImm() ||
1332 !
TII->isImmOperandLegal(MovDesc, SrcIdx,
1333 *OpToFold.getEffectiveImmVal())))
1336 if (!
MRI->constrainRegClass(SrcReg, MovSrcRC))
1346 if (!OpToFold.isImm() ||
1347 !
TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
1353 while (ImpOpI != ImpOpE) {
1360 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1362 MachineOperand NewSrcOp(SrcOp);
1384 LLVM_DEBUG(
dbgs() <<
"Folding " << OpToFold.OpToFold <<
"\n into "
1389 unsigned SubRegIdx = OpToFold.getSubReg();
1403 static_assert(AMDGPU::sub1_hi16 == 12,
"Subregister layout has changed");
1408 if (SubRegIdx > AMDGPU::sub1) {
1409 LaneBitmask
M =
TRI->getSubRegIndexLaneMask(SubRegIdx);
1410 M |=
M.getLane(
M.getHighestLane() - 1);
1411 SmallVector<unsigned, 4> Indexes;
1412 TRI->getCoveringSubRegIndexes(
TRI->getRegClassForReg(*
MRI,
UseReg), M,
1414 assert(Indexes.
size() == 1 &&
"Expected one 32-bit subreg to cover");
1415 SubRegIdx = Indexes[0];
1417 }
else if (
TII->getOpSize(*
UseMI, 1) == 4)
1420 SubRegIdx = AMDGPU::sub0;
1425 OpToFold.OpToFold->setIsKill(
false);
1429 if (foldCopyToAGPRRegSequence(
UseMI))
1434 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1435 (UseOpc == AMDGPU::V_READLANE_B32 &&
1437 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
1442 if (FoldingImmLike) {
1445 *OpToFold.DefMI, *
UseMI))
1450 if (OpToFold.isImm()) {
1452 *OpToFold.getEffectiveImmVal());
1453 }
else if (OpToFold.isFI())
1456 assert(OpToFold.isGlobal());
1458 OpToFold.OpToFold->getOffset(),
1459 OpToFold.OpToFold->getTargetFlags());
1465 if (OpToFold.isReg() &&
TRI->isSGPRReg(*
MRI, OpToFold.getReg())) {
1468 *OpToFold.DefMI, *
UseMI))
1489 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1497 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, OpToFold);
1503 case AMDGPU::V_AND_B32_e64:
1504 case AMDGPU::V_AND_B32_e32:
1505 case AMDGPU::S_AND_B32:
1508 case AMDGPU::V_OR_B32_e64:
1509 case AMDGPU::V_OR_B32_e32:
1510 case AMDGPU::S_OR_B32:
1513 case AMDGPU::V_XOR_B32_e64:
1514 case AMDGPU::V_XOR_B32_e32:
1515 case AMDGPU::S_XOR_B32:
1518 case AMDGPU::S_XNOR_B32:
1521 case AMDGPU::S_NAND_B32:
1524 case AMDGPU::S_NOR_B32:
1527 case AMDGPU::S_ANDN2_B32:
1530 case AMDGPU::S_ORN2_B32:
1533 case AMDGPU::V_LSHL_B32_e64:
1534 case AMDGPU::V_LSHL_B32_e32:
1535 case AMDGPU::S_LSHL_B32:
1537 Result =
LHS << (
RHS & 31);
1539 case AMDGPU::V_LSHLREV_B32_e64:
1540 case AMDGPU::V_LSHLREV_B32_e32:
1541 Result =
RHS << (
LHS & 31);
1543 case AMDGPU::V_LSHR_B32_e64:
1544 case AMDGPU::V_LSHR_B32_e32:
1545 case AMDGPU::S_LSHR_B32:
1546 Result =
LHS >> (
RHS & 31);
1548 case AMDGPU::V_LSHRREV_B32_e64:
1549 case AMDGPU::V_LSHRREV_B32_e32:
1550 Result =
RHS >> (
LHS & 31);
1552 case AMDGPU::V_ASHR_I32_e64:
1553 case AMDGPU::V_ASHR_I32_e32:
1554 case AMDGPU::S_ASHR_I32:
1555 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1557 case AMDGPU::V_ASHRREV_I32_e64:
1558 case AMDGPU::V_ASHRREV_I32_e32:
1559 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1567 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1570std::optional<int64_t>
1571SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &
Op)
const {
1575 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1576 return std::nullopt;
1578 const MachineInstr *
Def =
MRI->getVRegDef(
Op.getReg());
1579 if (Def &&
Def->isMoveImmediate()) {
1580 const MachineOperand &ImmSrc =
Def->getOperand(1);
1582 return TII->extractSubregFromImm(ImmSrc.
getImm(),
Op.getSubReg());
1585 return std::nullopt;
1591bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *
MI)
const {
1592 if (!
MI->allImplicitDefsAreDead())
1595 unsigned Opc =
MI->getOpcode();
1597 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
1601 MachineOperand *Src0 = &
MI->getOperand(Src0Idx);
1602 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1604 if ((
Opc == AMDGPU::V_NOT_B32_e64 ||
Opc == AMDGPU::V_NOT_B32_e32 ||
1605 Opc == AMDGPU::S_NOT_B32) &&
1607 MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
1608 TII->mutateAndCleanupImplicit(
1613 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
1617 MachineOperand *Src1 = &
MI->getOperand(Src1Idx);
1618 std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1620 if (!Src0Imm && !Src1Imm)
1626 if (Src0Imm && Src1Imm) {
1631 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1635 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1636 MI->removeOperand(Src1Idx);
1641 if (!
MI->isCommutable())
1644 if (Src0Imm && !Src1Imm) {
1650 int32_t Src1Val =
static_cast<int32_t
>(*Src1Imm);
1651 if (
Opc == AMDGPU::V_OR_B32_e64 ||
1652 Opc == AMDGPU::V_OR_B32_e32 ||
1653 Opc == AMDGPU::S_OR_B32) {
1656 MI->removeOperand(Src1Idx);
1657 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1658 }
else if (Src1Val == -1) {
1660 MI->removeOperand(Src1Idx);
1661 TII->mutateAndCleanupImplicit(
1669 if (
Opc == AMDGPU::V_AND_B32_e64 ||
Opc == AMDGPU::V_AND_B32_e32 ||
1670 Opc == AMDGPU::S_AND_B32) {
1673 MI->removeOperand(Src0Idx);
1674 TII->mutateAndCleanupImplicit(
1676 }
else if (Src1Val == -1) {
1678 MI->removeOperand(Src1Idx);
1679 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1686 if (
Opc == AMDGPU::V_XOR_B32_e64 ||
Opc == AMDGPU::V_XOR_B32_e32 ||
1687 Opc == AMDGPU::S_XOR_B32) {
1690 MI->removeOperand(Src1Idx);
1691 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1700bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &
MI)
const {
1701 unsigned Opc =
MI.getOpcode();
1702 if (
Opc != AMDGPU::V_CNDMASK_B32_e32 &&
Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1703 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1706 MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
1707 MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
1709 std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1713 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1714 if (!Src0Imm || *Src0Imm != *Src1Imm)
1719 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1_modifiers);
1721 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
1722 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1723 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1729 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
1731 MI.removeOperand(Src2Idx);
1732 MI.removeOperand(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1));
1733 if (Src1ModIdx != -1)
1734 MI.removeOperand(Src1ModIdx);
1735 if (Src0ModIdx != -1)
1736 MI.removeOperand(Src0ModIdx);
1737 TII->mutateAndCleanupImplicit(
MI, NewDesc);
1742bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &
MI)
const {
1743 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1744 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1747 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(
MI.getOperand(1));
1748 if (!Src0Imm || *Src0Imm != 0xffff || !
MI.getOperand(2).isReg())
1752 MachineInstr *SrcDef =
MRI->getVRegDef(Src1);
1757 MRI->replaceRegWith(Dst, Src1);
1758 if (!
MI.getOperand(2).isKill())
1759 MRI->clearKillFlags(Src1);
1760 MI.eraseFromParent();
1764bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &
MI,
1765 const FoldableDef &OpToFold)
const {
1769 SmallVector<MachineInstr *, 4> CopiesToReplace;
1771 MachineOperand &Dst =
MI.getOperand(0);
1774 if (OpToFold.isImm()) {
1785 if (tryConstantFoldOp(&
UseMI)) {
1794 for (
auto *U : UsesToProcess) {
1795 MachineInstr *
UseMI =
U->getParent();
1797 FoldableDef SubOpToFold = OpToFold.getWithSubReg(*
TRI,
U->getSubReg());
1802 if (CopiesToReplace.
empty() && FoldList.
empty())
1805 MachineFunction *MF =
MI.getMF();
1807 for (MachineInstr *Copy : CopiesToReplace)
1808 Copy->addImplicitDefUseOperands(*MF);
1810 SetVector<MachineInstr *> ConstantFoldCandidates;
1811 for (FoldCandidate &Fold : FoldList) {
1812 assert(!Fold.isReg() || Fold.Def.OpToFold);
1813 if (Fold.isReg() && Fold.getReg().isVirtual()) {
1815 const MachineInstr *
DefMI = Fold.Def.DefMI;
1823 assert(Fold.Def.OpToFold && Fold.isReg());
1827 MRI->clearKillFlags(Fold.getReg());
1830 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1834 ConstantFoldCandidates.
insert(Fold.UseMI);
1836 }
else if (Fold.Commuted) {
1838 TII->commuteInstruction(*Fold.UseMI,
false);
1842 for (MachineInstr *
MI : ConstantFoldCandidates) {
1843 if (tryConstantFoldOp(
MI)) {
1853bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI)
const {
1858 const TargetRegisterClass *DefRC =
1860 if (!
TRI->isAGPRClass(DefRC))
1864 MachineInstr *RegSeq =
MRI->getVRegDef(
UseReg);
1872 DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
1874 const TargetRegisterClass *UseRC =
1881 unsigned NumFoldable = 0;
1883 for (
unsigned I = 1;
I != NumRegSeqOperands;
I += 2) {
1899 const TargetRegisterClass *DestSuperRC =
TRI->getMatchingSuperRegClass(
1900 DefRC, &AMDGPU::AGPR_32RegClass, SubRegIdx);
1909 const TargetRegisterClass *InputRC =
1919 const TargetRegisterClass *MatchRC =
1920 TRI->getMatchingSuperRegClass(DefRC, InputRC, SubRegIdx);
1931 if (NumFoldable == 0)
1934 CopyMI->
setDesc(
TII->get(AMDGPU::REG_SEQUENCE));
1938 for (
auto [Def, DestSubIdx] : NewDefs) {
1939 if (!
Def->isReg()) {
1942 Register Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1943 BuildMI(
MBB, CopyMI,
DL,
TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1948 Def->setIsKill(
false);
1950 Register &VGPRCopy = VGPRCopies[Src];
1952 const TargetRegisterClass *VGPRUseSubRC =
1953 TRI->getSubRegisterClass(UseRC, DestSubIdx);
1962 const TargetRegisterClass *SubRC =
1963 TRI->getSubRegisterClass(
MRI->getRegClass(Src.Reg), Src.SubReg);
1966 VGPRCopy =
MRI->createVirtualRegister(VGPRUseSubRC);
1978 B.addImm(DestSubIdx);
1985bool SIFoldOperandsImpl::tryFoldFoldableCopy(
1986 MachineInstr &
MI, MachineOperand *&CurrentKnownM0Val)
const {
1990 if (DstReg == AMDGPU::M0) {
1991 MachineOperand &NewM0Val =
MI.getOperand(1);
1992 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1993 MI.eraseFromParent();
2004 MachineOperand *OpToFoldPtr;
2005 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
2007 if (
TII->hasAnyModifiersSet(
MI))
2009 OpToFoldPtr = &
MI.getOperand(2);
2011 OpToFoldPtr = &
MI.getOperand(1);
2012 MachineOperand &OpToFold = *OpToFoldPtr;
2016 if (!FoldingImm && !OpToFold.
isReg())
2021 !
TRI->isConstantPhysReg(OpToFold.
getReg()))
2033 const TargetRegisterClass *DstRC =
2034 MRI->getRegClass(
MI.getOperand(0).getReg());
2050 if (
MI.getOpcode() == AMDGPU::COPY && OpToFold.
isReg() &&
2052 if (DstRC == &AMDGPU::SReg_32RegClass &&
2053 DstRC ==
MRI->getRegClass(OpToFold.
getReg())) {
2061 if (OpToFold.
isReg() &&
MI.isCopy() && !
MI.getOperand(1).getSubReg()) {
2062 if (foldCopyToAGPRRegSequence(&
MI))
2066 FoldableDef
Def(OpToFold, DstRC);
2067 bool Changed = foldInstOperand(
MI, Def);
2074 auto *InstToErase = &
MI;
2075 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2076 auto &SrcOp = InstToErase->getOperand(1);
2078 InstToErase->eraseFromParent();
2080 InstToErase =
nullptr;
2083 InstToErase =
MRI->getVRegDef(SrcReg);
2084 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
2088 if (InstToErase && InstToErase->isRegSequence() &&
2089 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2090 InstToErase->eraseFromParent();
2100 return OpToFold.
isReg() &&
2101 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI);
2106const MachineOperand *
2107SIFoldOperandsImpl::isClamp(
const MachineInstr &
MI)
const {
2108 unsigned Op =
MI.getOpcode();
2110 case AMDGPU::V_MAX_F32_e64:
2111 case AMDGPU::V_MAX_F16_e64:
2112 case AMDGPU::V_MAX_F16_t16_e64:
2113 case AMDGPU::V_MAX_F16_fake16_e64:
2114 case AMDGPU::V_MAX_F64_e64:
2115 case AMDGPU::V_MAX_NUM_F64_e64:
2116 case AMDGPU::V_PK_MAX_F16:
2117 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2118 case AMDGPU::V_PK_MAX_NUM_BF16: {
2119 if (
MI.mayRaiseFPException())
2122 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
2126 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2127 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2131 Src0->
getSubReg() != AMDGPU::NoSubRegister)
2135 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2139 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
2141 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
2145 unsigned UnsetMods =
2146 (
Op == AMDGPU::V_PK_MAX_F16 ||
Op == AMDGPU::V_PK_MAX_NUM_BF16)
2149 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
2159bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &
MI) {
2160 const MachineOperand *ClampSrc = isClamp(
MI);
2161 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
2173 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
2176 if (
Def->mayRaiseFPException())
2179 MachineOperand *DefClamp =
TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
2183 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
2189 Register MIDstReg =
MI.getOperand(0).getReg();
2190 if (
TRI->isSGPRReg(*
MRI, DefReg)) {
2197 MRI->replaceRegWith(MIDstReg, DefReg);
2199 MI.eraseFromParent();
2204 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2205 Def->eraseFromParent();
2212 case AMDGPU::V_MUL_F64_e64:
2213 case AMDGPU::V_MUL_F64_pseudo_e64: {
2215 case 0x3fe0000000000000:
2217 case 0x4000000000000000:
2219 case 0x4010000000000000:
2225 case AMDGPU::V_MUL_F32_e64: {
2226 switch (
static_cast<uint32_t>(Val)) {
2237 case AMDGPU::V_MUL_F16_e64:
2238 case AMDGPU::V_MUL_F16_t16_e64:
2239 case AMDGPU::V_MUL_F16_fake16_e64: {
2240 switch (
static_cast<uint16_t>(Val)) {
2259std::pair<const MachineOperand *, int>
2260SIFoldOperandsImpl::isOMod(
const MachineInstr &
MI)
const {
2261 unsigned Op =
MI.getOpcode();
2263 case AMDGPU::V_MUL_F64_e64:
2264 case AMDGPU::V_MUL_F64_pseudo_e64:
2265 case AMDGPU::V_MUL_F32_e64:
2266 case AMDGPU::V_MUL_F16_t16_e64:
2267 case AMDGPU::V_MUL_F16_fake16_e64:
2268 case AMDGPU::V_MUL_F16_e64: {
2270 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
2272 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
2273 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
2274 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
2277 MI.mayRaiseFPException())
2280 const MachineOperand *RegOp =
nullptr;
2281 const MachineOperand *ImmOp =
nullptr;
2282 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2283 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2284 if (Src0->
isImm()) {
2287 }
else if (Src1->
isImm()) {
2295 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
2296 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
2297 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
2298 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
2301 return std::pair(RegOp, OMod);
2303 case AMDGPU::V_ADD_F64_e64:
2304 case AMDGPU::V_ADD_F64_pseudo_e64:
2305 case AMDGPU::V_ADD_F32_e64:
2306 case AMDGPU::V_ADD_F16_e64:
2307 case AMDGPU::V_ADD_F16_t16_e64:
2308 case AMDGPU::V_ADD_F16_fake16_e64: {
2310 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
2312 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
2313 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
2314 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
2319 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2320 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2324 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
2325 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
2326 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
2327 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2338bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &
MI) {
2339 const MachineOperand *RegOp;
2341 std::tie(RegOp, OMod) = isOMod(
MI);
2343 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
2344 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
2348 MachineOperand *DefOMod =
TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
2352 if (
Def->mayRaiseFPException())
2357 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
2363 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
2366 MRI->clearKillFlags(
Def->getOperand(0).getReg());
2367 MI.eraseFromParent();
2372 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2373 Def->eraseFromParent();
2380bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &
MI) {
2382 auto Reg =
MI.getOperand(0).getReg();
2385 !
MRI->hasOneNonDBGUse(
Reg))
2389 if (!getRegSeqInit(Defs,
Reg))
2392 for (
auto &[
Op, SubIdx] : Defs) {
2395 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
2398 const MachineInstr *SubDef =
MRI->getVRegDef(
Op->getReg());
2405 MachineOperand *
Op = &*
MRI->use_nodbg_begin(
Reg);
2406 MachineInstr *
UseMI =
Op->getParent();
2415 if (
Op->getSubReg())
2420 const TargetRegisterClass *OpRC =
TII->getRegClass(InstDesc,
OpIdx);
2421 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
2424 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(
Reg));
2425 auto Dst =
MRI->createVirtualRegister(NewDstRC);
2427 TII->get(AMDGPU::REG_SEQUENCE), Dst);
2429 for (
auto &[Def, SubIdx] : Defs) {
2430 Def->setIsKill(
false);
2434 MachineInstr *SubDef =
MRI->getVRegDef(
Def->getReg());
2444 RS->eraseFromParent();
2452 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
2453 MI.eraseFromParent();
2461 Register &OutReg,
unsigned &OutSubReg) {
2471 if (
TRI.isAGPR(
MRI, CopySrcReg)) {
2472 OutReg = CopySrcReg;
2481 if (!CopySrcDef || !CopySrcDef->
isCopy())
2488 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2489 !
TRI.isAGPR(
MRI, OtherCopySrcReg))
2492 OutReg = OtherCopySrcReg;
2526bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &
PHI) {
2530 if (!
TRI->isVGPR(*
MRI, PhiOut))
2535 const TargetRegisterClass *ARC =
nullptr;
2536 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2537 MachineOperand &MO =
PHI.getOperand(K);
2539 if (!Copy || !
Copy->isCopy())
2543 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2547 const TargetRegisterClass *CopyInRC =
MRI->getRegClass(AGPRSrc);
2548 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2559 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2563 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2564 MachineOperand &MO =
PHI.getOperand(K);
2568 MachineBasicBlock *InsertMBB =
nullptr;
2571 unsigned CopyOpc = AMDGPU::COPY;
2572 if (MachineInstr *Def =
MRI->getVRegDef(
Reg)) {
2576 if (
Def->isCopy()) {
2578 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2591 MachineOperand &CopyIn =
Def->getOperand(1);
2594 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2597 InsertMBB =
Def->getParent();
2604 Register NewReg =
MRI->createVirtualRegister(ARC);
2605 MachineInstr *
MI =
BuildMI(*InsertMBB, InsertPt,
PHI.getDebugLoc(),
2606 TII->get(CopyOpc), NewReg)
2615 Register NewReg =
MRI->createVirtualRegister(ARC);
2616 PHI.getOperand(0).setReg(NewReg);
2622 TII->get(AMDGPU::COPY), PhiOut)
2630bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &
MI) {
2635 MachineOperand &
Def =
MI.getOperand(0);
2652 while (!
Users.empty()) {
2653 const MachineInstr *
I =
Users.pop_back_val();
2654 if (!
I->isCopy() && !
I->isRegSequence())
2656 Register DstReg =
I->getOperand(0).getReg();
2660 if (
TRI->isAGPR(*
MRI, DstReg))
2663 for (
const MachineInstr &U :
MRI->use_nodbg_instructions(DstReg))
2664 Users.push_back(&U);
2667 const TargetRegisterClass *RC =
MRI->getRegClass(DefReg);
2668 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
2669 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2670 MRI->setRegClass(DefReg, RC);
2674 while (!MoveRegs.
empty()) {
2676 MRI->setRegClass(
Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(
Reg)));
2716bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &
MBB) {
2723 DenseMap<std::pair<Register, unsigned>, std::vector<MachineOperand *>>
2726 for (
auto &
MI :
MBB) {
2730 if (!
TRI->isAGPR(*
MRI,
MI.getOperand(0).getReg()))
2733 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2734 MachineOperand &PhiMO =
MI.getOperand(K);
2744 for (
const auto &[Entry, MOs] : RegToMO) {
2745 if (MOs.size() == 1)
2749 MachineInstr *
Def =
MRI->getVRegDef(
Reg);
2750 MachineBasicBlock *DefMBB =
Def->getParent();
2756 MRI->createVirtualRegister(
TRI->getEquivalentVGPRClass(ARC));
2757 MachineInstr *VGPRCopy =
2759 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2763 Register TempAGPR =
MRI->createVirtualRegister(ARC);
2765 TII->get(AMDGPU::COPY), TempAGPR)
2769 for (MachineOperand *MO : MOs) {
2781bool SIFoldOperandsImpl::run(MachineFunction &MF) {
2787 MFI = MF.
getInfo<SIMachineFunctionInfo>();
2798 MachineOperand *CurrentKnownM0Val =
nullptr;
2802 if (tryFoldZeroHighBits(
MI)) {
2807 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2812 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2817 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2822 if (
TII->isFoldableCopy(
MI)) {
2823 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2828 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2829 CurrentKnownM0Val =
nullptr;
2848 bool Changed = SIFoldOperandsImpl().run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, FoldCandidate &&Entry)
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static unsigned getMovOpc(bool IsScalar)
static MachineOperand * lookUpCopyChain(const SIInstrInfo &TII, const MachineRegisterInfo &MRI, Register SrcReg)
static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(const FoldableDef &OpToFold)
static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand(const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
bool hasNoSignedZerosFPMath() const
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
bool hasGFX90AInsts() const
const SIInstrInfo * getInstrInfo() const override
bool hasDOTOpSelHazard() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
const HexagonRegisterInfo & getRegisterInfo() const
ArrayRef< MCOperandInfo > operands() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
LLVM_ABI void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
char & SIFoldOperandsLegacyID
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.