20#define DEBUG_TYPE "si-fold-operands"
38 bool Commuted_ =
false,
40 UseMI(
MI), OpToFold(
nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
43 if (FoldOp->
isImm()) {
44 ImmToFold = FoldOp->
getImm();
45 }
else if (FoldOp->
isFI()) {
46 FrameIndexToFold = FoldOp->
getIndex();
67 bool needsShrink()
const {
return ShrinkOpcode != -1; }
70class SIFoldOperandsImpl {
82 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
84 case AMDGPU::S_ADD_I32: {
85 if (ST->hasAddNoCarry())
86 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
87 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
89 case AMDGPU::S_OR_B32:
90 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
91 case AMDGPU::S_AND_B32:
92 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
93 case AMDGPU::S_MUL_I32:
94 return AMDGPU::V_MUL_LO_U32_e64;
96 return AMDGPU::INSTRUCTION_LIST_END;
100 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
105 bool canUseImmWithOpSel(FoldCandidate &Fold)
const;
107 bool tryFoldImmWithOpSel(FoldCandidate &Fold)
const;
115 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
137 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
146 SIFoldOperandsImpl() =
default;
160 return SIFoldOperandsImpl().run(MF);
176char SIFoldOperandsLegacy::
ID = 0;
185 TRI.getSubRegisterClass(RC, MO.getSubReg()))
193 case AMDGPU::V_MAC_F32_e64:
194 return AMDGPU::V_MAD_F32_e64;
195 case AMDGPU::V_MAC_F16_e64:
196 return AMDGPU::V_MAD_F16_e64;
197 case AMDGPU::V_FMAC_F32_e64:
198 return AMDGPU::V_FMA_F32_e64;
199 case AMDGPU::V_FMAC_F16_e64:
200 return AMDGPU::V_FMA_F16_gfx9_e64;
201 case AMDGPU::V_FMAC_F16_fake16_e64:
202 return AMDGPU::V_FMA_F16_gfx9_e64;
203 case AMDGPU::V_FMAC_LEGACY_F32_e64:
204 return AMDGPU::V_FMA_LEGACY_F32_e64;
205 case AMDGPU::V_FMAC_F64_e64:
206 return AMDGPU::V_FMA_F64_e64;
208 return AMDGPU::INSTRUCTION_LIST_END;
213bool SIFoldOperandsImpl::frameIndexMayFold(
215 if (!OpToFold.
isFI())
218 const unsigned Opc =
UseMI.getOpcode();
220 case AMDGPU::S_ADD_I32:
221 case AMDGPU::S_OR_B32:
222 case AMDGPU::S_AND_B32:
223 case AMDGPU::V_ADD_U32_e32:
224 case AMDGPU::V_ADD_CO_U32_e32:
228 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
230 case AMDGPU::V_ADD_U32_e64:
231 case AMDGPU::V_ADD_CO_U32_e64:
232 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
248 return OpNo == VIdx && SIdx == -1;
254bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
256 if (
TRI->isVGPR(*
MRI, DstReg) &&
TRI->isSGPRReg(*
MRI, SrcReg) &&
257 MRI->hasOneNonDBGUse(SrcReg)) {
259 if (!Def ||
Def->getNumOperands() != 4)
274 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
275 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
276 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
277 !
Def->getOperand(3).isDead())
282 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
286 if (
Add->getDesc().getNumDefs() == 2) {
287 Register CarryOutReg =
MRI->createVirtualRegister(
TRI->getBoolRC());
289 MRI->setRegAllocationHint(CarryOutReg, 0,
TRI->getVCC());
292 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
296 Def->eraseFromParent();
297 MI.eraseFromParent();
301 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
312 Def->eraseFromParent();
313 MI.eraseFromParent();
322 return new SIFoldOperandsLegacy();
325bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold)
const {
328 const uint64_t TSFlags =
MI->getDesc().TSFlags;
337 unsigned Opcode =
MI->getOpcode();
338 int OpNo =
MI->getOperandNo(&Old);
339 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
355bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold)
const {
358 unsigned Opcode =
MI->getOpcode();
359 int OpNo =
MI->getOperandNo(&Old);
360 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
373 unsigned SrcIdx = ~0;
375 ModIdx = AMDGPU::OpName::src0_modifiers;
378 ModIdx = AMDGPU::OpName::src1_modifiers;
381 ModIdx = AMDGPU::OpName::src2_modifiers;
387 unsigned ModVal =
Mod.getImm();
411 Mod.setImm(NewModVal);
416 if (
static_cast<int16_t
>(
Lo) < 0) {
417 int32_t SExt =
static_cast<int16_t
>(
Lo);
419 Mod.setImm(NewModVal);
446 if (tryFoldToInline(Imm))
455 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
456 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
457 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
460 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
467 if (tryFoldToInline(NegImm)) {
469 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
470 MI->setDesc(
TII->get(NegOpcode));
479bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
484 if (Fold.isImm() && canUseImmWithOpSel(Fold)) {
485 if (tryFoldImmWithOpSel(Fold))
491 int OpNo =
MI->getOperandNo(&Old);
492 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
498 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
506 int Op32 = Fold.ShrinkOpcode;
511 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
514 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
518 if (HaveNonDbgCarryUse) {
531 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
532 MI->removeOperand(
I);
533 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
536 TII->commuteInstruction(*Inst32,
false);
540 assert(!Fold.needsShrink() &&
"not handled");
545 if (NewMFMAOpc == -1)
547 MI->setDesc(
TII->get(NewMFMAOpc));
548 MI->untieRegOperand(0);
554 if (Fold.isGlobal()) {
555 Old.
ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
556 Fold.OpToFold->getTargetFlags());
573 return any_of(FoldList, [&](
const auto &
C) {
return C.UseMI ==
MI; });
581 for (FoldCandidate &Fold : FoldList)
582 if (Fold.UseMI ==
MI && Fold.UseOpNo == OpNo)
584 LLVM_DEBUG(
dbgs() <<
"Append " << (Commuted ?
"commuted" :
"normal")
585 <<
" operand " << OpNo <<
"\n " << *
MI);
589bool SIFoldOperandsImpl::tryAddToFoldList(
592 const unsigned Opc =
MI->getOpcode();
594 auto tryToFoldAsFMAAKorMK = [&]() {
595 if (!OpToFold->
isImm())
598 const bool TryAK = OpNo == 3;
599 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
600 MI->setDesc(
TII->get(NewOpc));
603 bool FoldAsFMAAKorMK =
604 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
605 if (FoldAsFMAAKorMK) {
607 MI->untieRegOperand(3);
624 MI->setDesc(
TII->get(Opc));
628 bool IsLegal =
TII->isOperandLegal(*
MI, OpNo, OpToFold);
629 if (!IsLegal && OpToFold->
isImm()) {
630 FoldCandidate Fold(
MI, OpNo, OpToFold);
631 IsLegal = canUseImmWithOpSel(Fold);
637 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
640 MI->setDesc(
TII->get(NewOpc));
645 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
647 MI->untieRegOperand(OpNo);
651 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
652 MI->setDesc(
TII->get(Opc));
657 if (Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
658 if (tryToFoldAsFMAAKorMK())
663 if (OpToFold->
isImm()) {
665 if (Opc == AMDGPU::S_SETREG_B32)
666 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
667 else if (Opc == AMDGPU::S_SETREG_B32_mode)
668 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
670 MI->setDesc(
TII->get(ImmOpc));
685 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
693 if (!
MI->getOperand(OpNo).isReg() || !
MI->getOperand(CommuteOpNo).isReg())
696 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
700 if (!
TII->isOperandLegal(*
MI, CommuteOpNo, OpToFold)) {
701 if ((Opc != AMDGPU::V_ADD_CO_U32_e64 && Opc != AMDGPU::V_SUB_CO_U32_e64 &&
702 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
704 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
711 if (!OtherOp.
isReg() ||
718 unsigned MaybeCommutedOpc =
MI->getOpcode();
728 if ((Opc == AMDGPU::S_FMAAK_F32 || Opc == AMDGPU::S_FMAMK_F32) &&
729 !OpToFold->
isReg() && !
TII->isInlineConstant(*OpToFold)) {
730 unsigned ImmIdx = Opc == AMDGPU::S_FMAAK_F32 ? 3 : 2;
732 if (!OpImm.
isReg() &&
733 TII->isInlineConstant(*
MI,
MI->getOperand(OpNo), OpImm))
734 return tryToFoldAsFMAAKorMK();
742 if (Opc == AMDGPU::S_FMAC_F32 &&
743 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
744 if (tryToFoldAsFMAAKorMK())
750 if (
TII->isSALU(
MI->getOpcode())) {
755 if (!OpToFold->
isReg() && !
TII->isInlineConstant(*OpToFold, OpInfo)) {
757 for (
unsigned i = 0, e = InstDesc.
getNumOperands(); i != e; ++i) {
758 auto &
Op =
MI->getOperand(i);
759 if (OpNo != i && !
Op.isReg() &&
773 return !
TII->isSDWA(
MI);
779bool SIFoldOperandsImpl::getRegSeqInit(
783 if (!Def || !
Def->isRegSequence())
786 for (
unsigned I = 1, E =
Def->getNumExplicitOperands();
I < E;
I += 2) {
793 SubDef =
MRI->getVRegDef(Sub->
getReg())) {
796 if (
TII->isInlineConstant(*
Op, OpTy))
800 if (!
Op->isReg() ||
Op->getReg().isPhysical())
805 Defs.emplace_back(Sub,
Def->getOperand(
I + 1).getImm());
811bool SIFoldOperandsImpl::tryToFoldACImm(
815 if (UseOpIdx >=
Desc.getNumOperands())
821 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
822 if (OpToFold.
isImm() &&
TII->isInlineConstant(OpToFold, OpTy) &&
823 TII->isOperandLegal(*
UseMI, UseOpIdx, &OpToFold)) {
828 if (!OpToFold.
isReg())
841 if (!UseOp.
getSubReg() && Def &&
TII->isFoldableCopy(*Def)) {
843 if (DefOp.
isImm() &&
TII->isInlineConstant(DefOp, OpTy) &&
844 TII->isOperandLegal(*
UseMI, UseOpIdx, &DefOp)) {
851 if (!getRegSeqInit(Defs,
UseReg, OpTy))
855 for (
unsigned I = 0, E = Defs.
size();
I != E; ++
I) {
860 auto SubImm =
Op->getImm();
863 if (!
TII->isInlineConstant(*
Op, OpTy) ||
877void SIFoldOperandsImpl::foldOperand(
883 if (!isUseSafeToFold(*
UseMI, *UseOp))
900 for (
auto &
Use :
MRI->use_nodbg_operands(RegSeqDstReg))
902 for (
auto *RSUse : UsesToProcess) {
909 if (RSUse->getSubReg() != RegSeqDstSubReg)
912 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
918 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
921 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
926 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
927 MFI->getScratchRSrcReg())
933 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
953 bool FoldingImmLike =
971 if (DestRC == &AMDGPU::AGPR_32RegClass &&
983 unsigned MovOp =
TII->getMovOpcode(DestRC);
984 if (MovOp == AMDGPU::COPY)
989 while (ImpOpI != ImpOpE) {
996 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1041 for (
unsigned I = 0;
I <
Size / 4; ++
I) {
1046 int64_t
Imm =
Def->getImm();
1048 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1050 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addImm(Imm);
1052 }
else if (
Def->isReg() &&
TRI->isAGPR(*
MRI,
Def->getReg())) {
1054 Def->setIsKill(
false);
1055 if (!SeenAGPRs.
insert(Src)) {
1066 Def->setIsKill(
false);
1072 if (
TRI->isSGPRReg(*
MRI, Src.Reg)) {
1075 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1081 if (CopyToVGPR.
Reg) {
1083 if (VGPRCopies.
count(CopyToVGPR)) {
1084 Vgpr = VGPRCopies[CopyToVGPR];
1086 Vgpr =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1088 VGPRCopies[CopyToVGPR] = Vgpr;
1090 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1092 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addReg(Vgpr);
1096 B.addImm(Defs[
I].second);
1107 if (
TRI->isAGPR(*
MRI, Reg0) &&
TRI->isVGPR(*
MRI, Reg1))
1109 else if (
TRI->isVGPR(*
MRI, Reg0) &&
TRI->isAGPR(*
MRI, Reg1))
1111 else if (
ST->hasGFX90AInsts() &&
TRI->isAGPR(*
MRI, Reg0) &&
1118 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1119 (UseOpc == AMDGPU::V_READLANE_B32 &&
1126 if (FoldingImmLike) {
1135 if (OpToFold.
isImm())
1168 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1172 if (!FoldingImmLike) {
1173 if (OpToFold.
isReg() &&
ST->needsAlignedVGPRs()) {
1178 if (
TRI->hasVectorRegisters(RC) && OpToFold.
getSubReg()) {
1185 if (!RC || !
TRI->isProperlyAlignedRC(*RC))
1189 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, &OpToFold);
1200 TRI->getRegClass(FoldDesc.
operands()[0].RegClass);
1210 if (UseOp->
getSubReg() == AMDGPU::sub0) {
1218 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, &ImmOp);
1222 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, &OpToFold);
1228 case AMDGPU::V_AND_B32_e64:
1229 case AMDGPU::V_AND_B32_e32:
1230 case AMDGPU::S_AND_B32:
1233 case AMDGPU::V_OR_B32_e64:
1234 case AMDGPU::V_OR_B32_e32:
1235 case AMDGPU::S_OR_B32:
1238 case AMDGPU::V_XOR_B32_e64:
1239 case AMDGPU::V_XOR_B32_e32:
1240 case AMDGPU::S_XOR_B32:
1243 case AMDGPU::S_XNOR_B32:
1246 case AMDGPU::S_NAND_B32:
1249 case AMDGPU::S_NOR_B32:
1252 case AMDGPU::S_ANDN2_B32:
1253 Result =
LHS & ~RHS;
1255 case AMDGPU::S_ORN2_B32:
1256 Result =
LHS | ~RHS;
1258 case AMDGPU::V_LSHL_B32_e64:
1259 case AMDGPU::V_LSHL_B32_e32:
1260 case AMDGPU::S_LSHL_B32:
1262 Result =
LHS << (
RHS & 31);
1264 case AMDGPU::V_LSHLREV_B32_e64:
1265 case AMDGPU::V_LSHLREV_B32_e32:
1266 Result =
RHS << (
LHS & 31);
1268 case AMDGPU::V_LSHR_B32_e64:
1269 case AMDGPU::V_LSHR_B32_e32:
1270 case AMDGPU::S_LSHR_B32:
1271 Result =
LHS >> (
RHS & 31);
1273 case AMDGPU::V_LSHRREV_B32_e64:
1274 case AMDGPU::V_LSHRREV_B32_e32:
1275 Result =
RHS >> (
LHS & 31);
1277 case AMDGPU::V_ASHR_I32_e64:
1278 case AMDGPU::V_ASHR_I32_e32:
1279 case AMDGPU::S_ASHR_I32:
1280 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1282 case AMDGPU::V_ASHRREV_I32_e64:
1283 case AMDGPU::V_ASHRREV_I32_e32:
1284 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1292 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1296 MI.setDesc(NewDesc);
1302 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
1303 Desc.implicit_defs().size();
1305 for (
unsigned I =
MI.getNumOperands() - 1;
I >= NumOps; --
I)
1306 MI.removeOperand(
I);
1312 if (!
Op.isReg() ||
Op.getSubReg() != AMDGPU::NoSubRegister ||
1313 !
Op.getReg().isVirtual())
1317 if (Def &&
Def->isMoveImmediate()) {
1329bool SIFoldOperandsImpl::tryConstantFoldOp(
MachineInstr *
MI)
const {
1330 if (!
MI->allImplicitDefsAreDead())
1333 unsigned Opc =
MI->getOpcode();
1340 if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1341 Opc == AMDGPU::S_NOT_B32) &&
1343 MI->getOperand(1).ChangeToImmediate(~Src0->
getImm());
1364 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1368 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1369 MI->removeOperand(Src1Idx);
1374 if (!
MI->isCommutable())
1382 int32_t Src1Val =
static_cast<int32_t
>(Src1->
getImm());
1383 if (Opc == AMDGPU::V_OR_B32_e64 ||
1384 Opc == AMDGPU::V_OR_B32_e32 ||
1385 Opc == AMDGPU::S_OR_B32) {
1388 MI->removeOperand(Src1Idx);
1390 }
else if (Src1Val == -1) {
1392 MI->removeOperand(Src1Idx);
1400 if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 ||
1401 Opc == AMDGPU::S_AND_B32) {
1404 MI->removeOperand(Src0Idx);
1406 }
else if (Src1Val == -1) {
1408 MI->removeOperand(Src1Idx);
1416 if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 ||
1417 Opc == AMDGPU::S_XOR_B32) {
1420 MI->removeOperand(Src1Idx);
1431 unsigned Opc =
MI.getOpcode();
1432 if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1433 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1439 auto *Src0Imm = getImmOrMaterializedImm(*Src0);
1440 auto *Src1Imm = getImmOrMaterializedImm(*Src1);
1441 if (!Src1Imm->isIdenticalTo(*Src0Imm))
1449 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1450 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1458 MI.removeOperand(Src2Idx);
1460 if (Src1ModIdx != -1)
1461 MI.removeOperand(Src1ModIdx);
1462 if (Src0ModIdx != -1)
1463 MI.removeOperand(Src0ModIdx);
1469bool SIFoldOperandsImpl::tryFoldZeroHighBits(
MachineInstr &
MI)
const {
1470 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1471 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1480 if (!
ST->zeroesHigh16BitsOfDest(SrcDef->
getOpcode()))
1484 MRI->replaceRegWith(Dst, Src1);
1485 if (!
MI.getOperand(2).isKill())
1486 MRI->clearKillFlags(Src1);
1487 MI.eraseFromParent();
1499 bool Changed =
false;
1501 if (OpToFold.
isImm()) {
1512 if (tryConstantFoldOp(&
UseMI)) {
1520 for (
auto &
Use :
MRI->use_nodbg_operands(Dst.getReg()))
1522 for (
auto *U : UsesToProcess) {
1528 if (CopiesToReplace.
empty() && FoldList.
empty())
1534 Copy->addImplicitDefUseOperands(*MF);
1536 for (FoldCandidate &Fold : FoldList) {
1537 assert(!Fold.isReg() || Fold.OpToFold);
1538 if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
1548 assert(Fold.OpToFold && Fold.OpToFold->isReg());
1552 MRI->clearKillFlags(Fold.OpToFold->getReg());
1555 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1557 }
else if (Fold.Commuted) {
1559 TII->commuteInstruction(*Fold.UseMI,
false);
1565bool SIFoldOperandsImpl::tryFoldFoldableCopy(
1570 if (DstReg == AMDGPU::M0) {
1572 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1573 MI.eraseFromParent();
1585 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
1587 if (
TII->hasAnyModifiersSet(
MI))
1589 OpToFoldPtr = &
MI.getOperand(2);
1591 OpToFoldPtr = &
MI.getOperand(1);
1596 if (!FoldingImm && !OpToFold.
isReg())
1611 if (OpToFold.
isReg() &&
1612 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI))
1615 bool Changed = foldInstOperand(
MI, OpToFold);
1622 auto *InstToErase = &
MI;
1623 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1624 auto &
SrcOp = InstToErase->getOperand(1);
1626 InstToErase->eraseFromParent();
1628 InstToErase =
nullptr;
1631 InstToErase =
MRI->getVRegDef(SrcReg);
1632 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
1636 if (InstToErase && InstToErase->isRegSequence() &&
1637 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1638 InstToErase->eraseFromParent();
1649 unsigned Op =
MI.getOpcode();
1651 case AMDGPU::V_MAX_F32_e64:
1652 case AMDGPU::V_MAX_F16_e64:
1653 case AMDGPU::V_MAX_F16_t16_e64:
1654 case AMDGPU::V_MAX_F16_fake16_e64:
1655 case AMDGPU::V_MAX_F64_e64:
1656 case AMDGPU::V_MAX_NUM_F64_e64:
1657 case AMDGPU::V_PK_MAX_F16: {
1658 if (
MI.mayRaiseFPException())
1661 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
1670 Src0->
getSubReg() != AMDGPU::NoSubRegister)
1674 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1678 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
1680 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
1686 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1698 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
1704 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
1707 if (
Def->mayRaiseFPException())
1714 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
1720 Register MIDstReg =
MI.getOperand(0).getReg();
1721 if (
TRI->isSGPRReg(*
MRI, DefReg)) {
1728 MRI->replaceRegWith(MIDstReg, DefReg);
1730 MI.eraseFromParent();
1735 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1736 Def->eraseFromParent();
1743 case AMDGPU::V_MUL_F64_e64:
1744 case AMDGPU::V_MUL_F64_pseudo_e64: {
1746 case 0x3fe0000000000000:
1748 case 0x4000000000000000:
1750 case 0x4010000000000000:
1756 case AMDGPU::V_MUL_F32_e64: {
1757 switch (
static_cast<uint32_t>(Val)) {
1768 case AMDGPU::V_MUL_F16_e64:
1769 case AMDGPU::V_MUL_F16_t16_e64:
1770 case AMDGPU::V_MUL_F16_fake16_e64: {
1771 switch (
static_cast<uint16_t>(Val)) {
1790std::pair<const MachineOperand *, int>
1792 unsigned Op =
MI.getOpcode();
1794 case AMDGPU::V_MUL_F64_e64:
1795 case AMDGPU::V_MUL_F64_pseudo_e64:
1796 case AMDGPU::V_MUL_F32_e64:
1797 case AMDGPU::V_MUL_F16_t16_e64:
1798 case AMDGPU::V_MUL_F16_fake16_e64:
1799 case AMDGPU::V_MUL_F16_e64: {
1801 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
1803 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
1804 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
1805 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
1806 MFI->getMode().FP64FP16Denormals.Output !=
1808 MI.mayRaiseFPException())
1815 if (Src0->
isImm()) {
1818 }
else if (Src1->
isImm()) {
1826 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
1827 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
1828 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
1829 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
1832 return std::pair(RegOp, OMod);
1834 case AMDGPU::V_ADD_F64_e64:
1835 case AMDGPU::V_ADD_F64_pseudo_e64:
1836 case AMDGPU::V_ADD_F32_e64:
1837 case AMDGPU::V_ADD_F16_e64:
1838 case AMDGPU::V_ADD_F16_t16_e64:
1839 case AMDGPU::V_ADD_F16_fake16_e64: {
1841 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
1843 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
1844 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
1845 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
1855 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
1856 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
1857 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
1858 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1872 std::tie(RegOp, OMod) = isOMod(
MI);
1874 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
1875 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
1883 if (
Def->mayRaiseFPException())
1888 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1894 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
1897 MRI->clearKillFlags(
Def->getOperand(0).getReg());
1898 MI.eraseFromParent();
1903 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1904 Def->eraseFromParent();
1913 auto Reg =
MI.getOperand(0).getReg();
1915 if (!
ST->hasGFX90AInsts() || !
TRI->isVGPR(*
MRI, Reg) ||
1916 !
MRI->hasOneNonDBGUse(Reg))
1923 for (
auto &[
Op, SubIdx] : Defs) {
1926 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
1940 if (!
TRI->isVGPR(*
MRI, Reg) || !
MRI->hasOneNonDBGUse(Reg))
1942 Op = &*
MRI->use_nodbg_begin(Reg);
1946 if (
Op->getSubReg())
1952 TII->getRegClass(InstDesc, OpIdx,
TRI, *
MI.getMF());
1953 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
1956 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg));
1957 auto Dst =
MRI->createVirtualRegister(NewDstRC);
1959 TII->get(AMDGPU::REG_SEQUENCE), Dst);
1961 for (
auto &[Def, SubIdx] : Defs) {
1962 Def->setIsKill(
false);
1974 if (!
TII->isOperandLegal(*
UseMI, OpIdx,
Op)) {
1976 RS->eraseFromParent();
1984 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
1985 MI.eraseFromParent();
1993 Register &OutReg,
unsigned &OutSubReg) {
2003 if (
TRI.isAGPR(
MRI, CopySrcReg)) {
2004 OutReg = CopySrcReg;
2013 if (!CopySrcDef || !CopySrcDef->
isCopy())
2020 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2021 !
TRI.isAGPR(
MRI, OtherCopySrcReg))
2024 OutReg = OtherCopySrcReg;
2062 if (!
TRI->isVGPR(*
MRI, PhiOut))
2068 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2071 if (!Copy || !
Copy->isCopy())
2075 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2080 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2091 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2095 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2103 unsigned CopyOpc = AMDGPU::COPY;
2108 if (
Def->isCopy()) {
2110 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2124 if (IsAGPR32 && !
ST->hasGFX90AInsts() && !
MRI->hasOneNonDBGUse(Reg) &&
2126 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2129 InsertMBB =
Def->getParent();
2136 Register NewReg =
MRI->createVirtualRegister(ARC);
2138 TII->get(CopyOpc), NewReg)
2147 Register NewReg =
MRI->createVirtualRegister(ARC);
2148 PHI.getOperand(0).setReg(NewReg);
2154 TII->get(AMDGPU::COPY), PhiOut)
2164 if (!
ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
2185 while (!
Users.empty()) {
2187 if (!
I->isCopy() && !
I->isRegSequence())
2189 Register DstReg =
I->getOperand(0).getReg();
2193 if (
TRI->isAGPR(*
MRI, DstReg))
2197 Users.push_back(&U);
2201 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
2202 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2203 MRI->setRegClass(DefReg, RC);
2207 while (!MoveRegs.
empty()) {
2209 MRI->setRegClass(Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg)));
2252 if (
ST->hasGFX90AInsts())
2259 for (
auto &
MI :
MBB) {
2263 if (!
TRI->isAGPR(*
MRI,
MI.getOperand(0).getReg()))
2266 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2276 bool Changed =
false;
2277 for (
const auto &[Entry, MOs] : RegToMO) {
2278 if (MOs.size() == 1)
2289 MRI->createVirtualRegister(
TRI->getEquivalentVGPRClass(ARC));
2292 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2296 Register TempAGPR =
MRI->createVirtualRegister(ARC);
2298 TII->get(AMDGPU::COPY), TempAGPR)
2317 TII =
ST->getInstrInfo();
2318 TRI = &
TII->getRegisterInfo();
2326 bool HasNSZ = MFI->hasNoSignedZerosFPMath();
2328 bool Changed =
false;
2332 Changed |= tryFoldCndMask(
MI);
2334 if (tryFoldZeroHighBits(
MI)) {
2339 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2344 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2349 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2354 if (
TII->isFoldableCopy(
MI)) {
2355 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2360 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2361 CurrentKnownM0Val =
nullptr;
2367 Changed |= tryFoldClamp(
MI);
2370 Changed |= tryOptimizeAGPRPhis(*
MBB);
2378 bool Changed = SIFoldOperandsImpl().run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
static unsigned getMovOpc(bool IsScalar)
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted=false, int ShrinkOp=-1)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
This holds information about one operand of a machine instruction, indicating the register class for ...
iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DWARFExpression::Operation Op
char & SIFoldOperandsLegacyID
iterator_range< df_iterator< T > > depth_first(const T &G)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
A pair composed of a register and a sub-register index.