20#define DEBUG_TYPE "si-fold-operands"
38 bool Commuted_ =
false,
40 UseMI(
MI), OpToFold(
nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
43 if (FoldOp->
isImm()) {
44 ImmToFold = FoldOp->
getImm();
45 }
else if (FoldOp->
isFI()) {
46 FrameIndexToFold = FoldOp->
getIndex();
67 bool needsShrink()
const {
return ShrinkOpcode != -1; }
70class SIFoldOperandsImpl {
82 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
84 case AMDGPU::S_ADD_I32: {
85 if (ST->hasAddNoCarry())
86 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
87 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
89 case AMDGPU::S_OR_B32:
90 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
91 case AMDGPU::S_AND_B32:
92 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
93 case AMDGPU::S_MUL_I32:
94 return AMDGPU::V_MUL_LO_U32_e64;
96 return AMDGPU::INSTRUCTION_LIST_END;
100 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
105 bool canUseImmWithOpSel(FoldCandidate &Fold)
const;
107 bool tryFoldImmWithOpSel(FoldCandidate &Fold)
const;
115 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
137 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
146 SIFoldOperandsImpl() =
default;
160 return SIFoldOperandsImpl().run(MF);
176char SIFoldOperandsLegacy::
ID = 0;
185 TRI.getSubRegisterClass(RC, MO.getSubReg()))
193 case AMDGPU::V_MAC_F32_e64:
194 return AMDGPU::V_MAD_F32_e64;
195 case AMDGPU::V_MAC_F16_e64:
196 return AMDGPU::V_MAD_F16_e64;
197 case AMDGPU::V_FMAC_F32_e64:
198 return AMDGPU::V_FMA_F32_e64;
199 case AMDGPU::V_FMAC_F16_e64:
200 return AMDGPU::V_FMA_F16_gfx9_e64;
201 case AMDGPU::V_FMAC_F16_fake16_e64:
202 return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
203 case AMDGPU::V_FMAC_LEGACY_F32_e64:
204 return AMDGPU::V_FMA_LEGACY_F32_e64;
205 case AMDGPU::V_FMAC_F64_e64:
206 return AMDGPU::V_FMA_F64_e64;
208 return AMDGPU::INSTRUCTION_LIST_END;
213bool SIFoldOperandsImpl::frameIndexMayFold(
215 if (!OpToFold.
isFI())
218 const unsigned Opc =
UseMI.getOpcode();
220 case AMDGPU::S_ADD_I32:
221 case AMDGPU::S_OR_B32:
222 case AMDGPU::S_AND_B32:
223 case AMDGPU::V_ADD_U32_e32:
224 case AMDGPU::V_ADD_CO_U32_e32:
228 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
230 case AMDGPU::V_ADD_U32_e64:
231 case AMDGPU::V_ADD_CO_U32_e64:
232 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
248 return OpNo == VIdx && SIdx == -1;
254bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
256 if (
TRI->isVGPR(*
MRI, DstReg) &&
TRI->isSGPRReg(*
MRI, SrcReg) &&
257 MRI->hasOneNonDBGUse(SrcReg)) {
259 if (!Def ||
Def->getNumOperands() != 4)
274 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
275 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
276 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
277 !
Def->getOperand(3).isDead())
282 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
286 if (
Add->getDesc().getNumDefs() == 2) {
287 Register CarryOutReg =
MRI->createVirtualRegister(
TRI->getBoolRC());
289 MRI->setRegAllocationHint(CarryOutReg, 0,
TRI->getVCC());
292 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
296 Def->eraseFromParent();
297 MI.eraseFromParent();
301 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
312 Def->eraseFromParent();
313 MI.eraseFromParent();
322 return new SIFoldOperandsLegacy();
325bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold)
const {
328 const uint64_t TSFlags =
MI->getDesc().TSFlags;
337 unsigned Opcode =
MI->getOpcode();
338 int OpNo =
MI->getOperandNo(&Old);
339 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
355bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold)
const {
358 unsigned Opcode =
MI->getOpcode();
359 int OpNo =
MI->getOperandNo(&Old);
360 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
373 unsigned SrcIdx = ~0;
375 ModIdx = AMDGPU::OpName::src0_modifiers;
378 ModIdx = AMDGPU::OpName::src1_modifiers;
381 ModIdx = AMDGPU::OpName::src2_modifiers;
387 unsigned ModVal =
Mod.getImm();
411 Mod.setImm(NewModVal);
416 if (
static_cast<int16_t
>(
Lo) < 0) {
417 int32_t SExt =
static_cast<int16_t
>(
Lo);
419 Mod.setImm(NewModVal);
446 if (tryFoldToInline(Imm))
455 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
456 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
457 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
460 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
467 if (tryFoldToInline(NegImm)) {
469 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
470 MI->setDesc(
TII->get(NegOpcode));
479bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
484 if (Fold.isImm() && canUseImmWithOpSel(Fold)) {
485 if (tryFoldImmWithOpSel(Fold))
491 int OpNo =
MI->getOperandNo(&Old);
492 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
498 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
506 int Op32 = Fold.ShrinkOpcode;
511 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
514 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
518 if (HaveNonDbgCarryUse) {
531 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
532 MI->removeOperand(
I);
533 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
536 TII->commuteInstruction(*Inst32,
false);
540 assert(!Fold.needsShrink() &&
"not handled");
545 if (NewMFMAOpc == -1)
547 MI->setDesc(
TII->get(NewMFMAOpc));
548 MI->untieRegOperand(0);
554 if (Fold.isGlobal()) {
555 Old.
ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
556 Fold.OpToFold->getTargetFlags());
573 return any_of(FoldList, [&](
const auto &
C) {
return C.UseMI ==
MI; });
581 for (FoldCandidate &Fold : FoldList)
582 if (Fold.UseMI ==
MI && Fold.UseOpNo == OpNo)
584 LLVM_DEBUG(
dbgs() <<
"Append " << (Commuted ?
"commuted" :
"normal")
585 <<
" operand " << OpNo <<
"\n " << *
MI);
589bool SIFoldOperandsImpl::tryAddToFoldList(
592 const unsigned Opc =
MI->getOpcode();
594 auto tryToFoldAsFMAAKorMK = [&]() {
595 if (!OpToFold->
isImm())
598 const bool TryAK = OpNo == 3;
599 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
600 MI->setDesc(
TII->get(NewOpc));
603 bool FoldAsFMAAKorMK =
604 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
605 if (FoldAsFMAAKorMK) {
607 MI->untieRegOperand(3);
624 MI->setDesc(
TII->get(Opc));
628 bool IsLegal =
TII->isOperandLegal(*
MI, OpNo, OpToFold);
629 if (!IsLegal && OpToFold->
isImm()) {
630 FoldCandidate Fold(
MI, OpNo, OpToFold);
631 IsLegal = canUseImmWithOpSel(Fold);
637 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
640 MI->setDesc(
TII->get(NewOpc));
645 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
647 MI->untieRegOperand(OpNo);
651 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
652 MI->setDesc(
TII->get(Opc));
657 if (Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
658 if (tryToFoldAsFMAAKorMK())
663 if (OpToFold->
isImm()) {
665 if (Opc == AMDGPU::S_SETREG_B32)
666 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
667 else if (Opc == AMDGPU::S_SETREG_B32_mode)
668 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
670 MI->setDesc(
TII->get(ImmOpc));
685 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
693 if (!
MI->getOperand(OpNo).isReg() || !
MI->getOperand(CommuteOpNo).isReg())
696 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
700 if (!
TII->isOperandLegal(*
MI, CommuteOpNo, OpToFold)) {
701 if ((Opc != AMDGPU::V_ADD_CO_U32_e64 && Opc != AMDGPU::V_SUB_CO_U32_e64 &&
702 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
704 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
711 if (!OtherOp.
isReg() ||
718 unsigned MaybeCommutedOpc =
MI->getOpcode();
728 if ((Opc == AMDGPU::S_FMAAK_F32 || Opc == AMDGPU::S_FMAMK_F32) &&
729 !OpToFold->
isReg() && !
TII->isInlineConstant(*OpToFold)) {
730 unsigned ImmIdx = Opc == AMDGPU::S_FMAAK_F32 ? 3 : 2;
732 if (!OpImm.
isReg() &&
733 TII->isInlineConstant(*
MI,
MI->getOperand(OpNo), OpImm))
734 return tryToFoldAsFMAAKorMK();
742 if (Opc == AMDGPU::S_FMAC_F32 &&
743 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
744 if (tryToFoldAsFMAAKorMK())
750 if (
TII->isSALU(
MI->getOpcode())) {
755 if (!OpToFold->
isReg() && !
TII->isInlineConstant(*OpToFold, OpInfo)) {
757 for (
unsigned i = 0, e = InstDesc.
getNumOperands(); i != e; ++i) {
758 auto &
Op =
MI->getOperand(i);
759 if (OpNo != i && !
Op.isReg() &&
773 return !
TII->isSDWA(
MI);
779bool SIFoldOperandsImpl::getRegSeqInit(
783 if (!Def || !
Def->isRegSequence())
786 for (
unsigned I = 1, E =
Def->getNumExplicitOperands();
I < E;
I += 2) {
793 SubDef =
MRI->getVRegDef(Sub->
getReg())) {
796 if (
TII->isInlineConstant(*
Op, OpTy))
800 if (!
Op->isReg() ||
Op->getReg().isPhysical())
805 Defs.emplace_back(Sub,
Def->getOperand(
I + 1).getImm());
811bool SIFoldOperandsImpl::tryToFoldACImm(
815 if (UseOpIdx >=
Desc.getNumOperands())
821 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
822 if (OpToFold.
isImm() &&
TII->isInlineConstant(OpToFold, OpTy) &&
823 TII->isOperandLegal(*
UseMI, UseOpIdx, &OpToFold)) {
828 if (!OpToFold.
isReg())
841 if (!UseOp.
getSubReg() && Def &&
TII->isFoldableCopy(*Def)) {
843 if (DefOp.
isImm() &&
TII->isInlineConstant(DefOp, OpTy) &&
844 TII->isOperandLegal(*
UseMI, UseOpIdx, &DefOp)) {
851 if (!getRegSeqInit(Defs,
UseReg, OpTy))
855 for (
unsigned I = 0, E = Defs.
size();
I != E; ++
I) {
860 auto SubImm =
Op->getImm();
863 if (!
TII->isInlineConstant(*
Op, OpTy) ||
877void SIFoldOperandsImpl::foldOperand(
883 if (!isUseSafeToFold(*
UseMI, *UseOp))
900 for (
auto &
Use :
MRI->use_nodbg_operands(RegSeqDstReg))
902 for (
auto *RSUse : UsesToProcess) {
909 if (RSUse->getSubReg() != RegSeqDstSubReg)
912 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
918 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
921 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
926 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
927 MFI->getScratchRSrcReg())
933 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
953 bool FoldingImmLike =
971 if (DestRC == &AMDGPU::AGPR_32RegClass &&
983 unsigned MovOp =
TII->getMovOpcode(DestRC);
984 if (MovOp == AMDGPU::COPY)
989 while (ImpOpI != ImpOpE) {
996 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1041 for (
unsigned I = 0;
I <
Size / 4; ++
I) {
1046 int64_t
Imm =
Def->getImm();
1048 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1050 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addImm(Imm);
1052 }
else if (
Def->isReg() &&
TRI->isAGPR(*
MRI,
Def->getReg())) {
1054 Def->setIsKill(
false);
1055 if (!SeenAGPRs.
insert(Src)) {
1066 Def->setIsKill(
false);
1072 if (
TRI->isSGPRReg(*
MRI, Src.Reg)) {
1075 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1081 if (CopyToVGPR.
Reg) {
1083 if (VGPRCopies.
count(CopyToVGPR)) {
1084 Vgpr = VGPRCopies[CopyToVGPR];
1086 Vgpr =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1088 VGPRCopies[CopyToVGPR] = Vgpr;
1090 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1092 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addReg(Vgpr);
1096 B.addImm(Defs[
I].second);
1105 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1106 (UseOpc == AMDGPU::V_READLANE_B32 &&
1113 if (FoldingImmLike) {
1122 if (OpToFold.
isImm())
1155 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1159 if (!FoldingImmLike) {
1160 if (OpToFold.
isReg() &&
ST->needsAlignedVGPRs()) {
1165 if (
TRI->hasVectorRegisters(RC) && OpToFold.
getSubReg()) {
1172 if (!RC || !
TRI->isProperlyAlignedRC(*RC))
1176 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, &OpToFold);
1187 TRI->getRegClass(FoldDesc.
operands()[0].RegClass);
1197 if (UseOp->
getSubReg() == AMDGPU::sub0) {
1205 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, &ImmOp);
1209 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, &OpToFold);
1215 case AMDGPU::V_AND_B32_e64:
1216 case AMDGPU::V_AND_B32_e32:
1217 case AMDGPU::S_AND_B32:
1220 case AMDGPU::V_OR_B32_e64:
1221 case AMDGPU::V_OR_B32_e32:
1222 case AMDGPU::S_OR_B32:
1225 case AMDGPU::V_XOR_B32_e64:
1226 case AMDGPU::V_XOR_B32_e32:
1227 case AMDGPU::S_XOR_B32:
1230 case AMDGPU::S_XNOR_B32:
1233 case AMDGPU::S_NAND_B32:
1236 case AMDGPU::S_NOR_B32:
1239 case AMDGPU::S_ANDN2_B32:
1240 Result =
LHS & ~RHS;
1242 case AMDGPU::S_ORN2_B32:
1243 Result =
LHS | ~RHS;
1245 case AMDGPU::V_LSHL_B32_e64:
1246 case AMDGPU::V_LSHL_B32_e32:
1247 case AMDGPU::S_LSHL_B32:
1249 Result =
LHS << (
RHS & 31);
1251 case AMDGPU::V_LSHLREV_B32_e64:
1252 case AMDGPU::V_LSHLREV_B32_e32:
1253 Result =
RHS << (
LHS & 31);
1255 case AMDGPU::V_LSHR_B32_e64:
1256 case AMDGPU::V_LSHR_B32_e32:
1257 case AMDGPU::S_LSHR_B32:
1258 Result =
LHS >> (
RHS & 31);
1260 case AMDGPU::V_LSHRREV_B32_e64:
1261 case AMDGPU::V_LSHRREV_B32_e32:
1262 Result =
RHS >> (
LHS & 31);
1264 case AMDGPU::V_ASHR_I32_e64:
1265 case AMDGPU::V_ASHR_I32_e32:
1266 case AMDGPU::S_ASHR_I32:
1267 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1269 case AMDGPU::V_ASHRREV_I32_e64:
1270 case AMDGPU::V_ASHRREV_I32_e32:
1271 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1279 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1283 MI.setDesc(NewDesc);
1289 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
1290 Desc.implicit_defs().size();
1292 for (
unsigned I =
MI.getNumOperands() - 1;
I >= NumOps; --
I)
1293 MI.removeOperand(
I);
1299 if (!
Op.isReg() ||
Op.getSubReg() != AMDGPU::NoSubRegister ||
1300 !
Op.getReg().isVirtual())
1304 if (Def &&
Def->isMoveImmediate()) {
1316bool SIFoldOperandsImpl::tryConstantFoldOp(
MachineInstr *
MI)
const {
1317 if (!
MI->allImplicitDefsAreDead())
1320 unsigned Opc =
MI->getOpcode();
1327 if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1328 Opc == AMDGPU::S_NOT_B32) &&
1330 MI->getOperand(1).ChangeToImmediate(~Src0->
getImm());
1351 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1355 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1356 MI->removeOperand(Src1Idx);
1361 if (!
MI->isCommutable())
1369 int32_t Src1Val =
static_cast<int32_t
>(Src1->
getImm());
1370 if (Opc == AMDGPU::V_OR_B32_e64 ||
1371 Opc == AMDGPU::V_OR_B32_e32 ||
1372 Opc == AMDGPU::S_OR_B32) {
1375 MI->removeOperand(Src1Idx);
1377 }
else if (Src1Val == -1) {
1379 MI->removeOperand(Src1Idx);
1387 if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 ||
1388 Opc == AMDGPU::S_AND_B32) {
1391 MI->removeOperand(Src0Idx);
1393 }
else if (Src1Val == -1) {
1395 MI->removeOperand(Src1Idx);
1403 if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 ||
1404 Opc == AMDGPU::S_XOR_B32) {
1407 MI->removeOperand(Src1Idx);
1418 unsigned Opc =
MI.getOpcode();
1419 if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1420 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1426 auto *Src0Imm = getImmOrMaterializedImm(*Src0);
1427 auto *Src1Imm = getImmOrMaterializedImm(*Src1);
1428 if (!Src1Imm->isIdenticalTo(*Src0Imm))
1436 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1437 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1445 MI.removeOperand(Src2Idx);
1447 if (Src1ModIdx != -1)
1448 MI.removeOperand(Src1ModIdx);
1449 if (Src0ModIdx != -1)
1450 MI.removeOperand(Src0ModIdx);
1456bool SIFoldOperandsImpl::tryFoldZeroHighBits(
MachineInstr &
MI)
const {
1457 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1458 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1467 if (!
ST->zeroesHigh16BitsOfDest(SrcDef->
getOpcode()))
1471 MRI->replaceRegWith(Dst, Src1);
1472 if (!
MI.getOperand(2).isKill())
1473 MRI->clearKillFlags(Src1);
1474 MI.eraseFromParent();
1486 bool Changed =
false;
1488 if (OpToFold.
isImm()) {
1499 if (tryConstantFoldOp(&
UseMI)) {
1507 for (
auto &
Use :
MRI->use_nodbg_operands(Dst.getReg()))
1509 for (
auto *U : UsesToProcess) {
1515 if (CopiesToReplace.
empty() && FoldList.
empty())
1521 Copy->addImplicitDefUseOperands(*MF);
1523 for (FoldCandidate &Fold : FoldList) {
1524 assert(!Fold.isReg() || Fold.OpToFold);
1525 if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
1535 assert(Fold.OpToFold && Fold.OpToFold->isReg());
1539 MRI->clearKillFlags(Fold.OpToFold->getReg());
1542 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1544 }
else if (Fold.Commuted) {
1546 TII->commuteInstruction(*Fold.UseMI,
false);
1552bool SIFoldOperandsImpl::tryFoldFoldableCopy(
1557 if (DstReg == AMDGPU::M0) {
1559 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1560 MI.eraseFromParent();
1572 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
1574 if (
TII->hasAnyModifiersSet(
MI))
1576 OpToFoldPtr = &
MI.getOperand(2);
1578 OpToFoldPtr = &
MI.getOperand(1);
1583 if (!FoldingImm && !OpToFold.
isReg())
1598 if (OpToFold.
isReg() &&
1599 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI))
1602 bool Changed = foldInstOperand(
MI, OpToFold);
1609 auto *InstToErase = &
MI;
1610 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1611 auto &
SrcOp = InstToErase->getOperand(1);
1613 InstToErase->eraseFromParent();
1615 InstToErase =
nullptr;
1618 InstToErase =
MRI->getVRegDef(SrcReg);
1619 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
1623 if (InstToErase && InstToErase->isRegSequence() &&
1624 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1625 InstToErase->eraseFromParent();
1636 unsigned Op =
MI.getOpcode();
1638 case AMDGPU::V_MAX_F32_e64:
1639 case AMDGPU::V_MAX_F16_e64:
1640 case AMDGPU::V_MAX_F16_t16_e64:
1641 case AMDGPU::V_MAX_F16_fake16_e64:
1642 case AMDGPU::V_MAX_F64_e64:
1643 case AMDGPU::V_MAX_NUM_F64_e64:
1644 case AMDGPU::V_PK_MAX_F16: {
1645 if (
MI.mayRaiseFPException())
1648 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
1657 Src0->
getSubReg() != AMDGPU::NoSubRegister)
1661 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1665 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
1667 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
1673 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1685 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
1691 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
1694 if (
Def->mayRaiseFPException())
1701 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
1707 Register MIDstReg =
MI.getOperand(0).getReg();
1708 if (
TRI->isSGPRReg(*
MRI, DefReg)) {
1715 MRI->replaceRegWith(MIDstReg, DefReg);
1717 MI.eraseFromParent();
1722 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1723 Def->eraseFromParent();
1730 case AMDGPU::V_MUL_F64_e64:
1731 case AMDGPU::V_MUL_F64_pseudo_e64: {
1733 case 0x3fe0000000000000:
1735 case 0x4000000000000000:
1737 case 0x4010000000000000:
1743 case AMDGPU::V_MUL_F32_e64: {
1744 switch (
static_cast<uint32_t>(Val)) {
1755 case AMDGPU::V_MUL_F16_e64:
1756 case AMDGPU::V_MUL_F16_t16_e64:
1757 case AMDGPU::V_MUL_F16_fake16_e64: {
1758 switch (
static_cast<uint16_t>(Val)) {
1777std::pair<const MachineOperand *, int>
1779 unsigned Op =
MI.getOpcode();
1781 case AMDGPU::V_MUL_F64_e64:
1782 case AMDGPU::V_MUL_F64_pseudo_e64:
1783 case AMDGPU::V_MUL_F32_e64:
1784 case AMDGPU::V_MUL_F16_t16_e64:
1785 case AMDGPU::V_MUL_F16_fake16_e64:
1786 case AMDGPU::V_MUL_F16_e64: {
1788 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
1790 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
1791 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
1792 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
1793 MFI->getMode().FP64FP16Denormals.Output !=
1795 MI.mayRaiseFPException())
1802 if (Src0->
isImm()) {
1805 }
else if (Src1->
isImm()) {
1813 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
1814 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
1815 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
1816 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
1819 return std::pair(RegOp, OMod);
1821 case AMDGPU::V_ADD_F64_e64:
1822 case AMDGPU::V_ADD_F64_pseudo_e64:
1823 case AMDGPU::V_ADD_F32_e64:
1824 case AMDGPU::V_ADD_F16_e64:
1825 case AMDGPU::V_ADD_F16_t16_e64:
1826 case AMDGPU::V_ADD_F16_fake16_e64: {
1828 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
1830 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
1831 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
1832 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
1842 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
1843 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
1844 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
1845 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1859 std::tie(RegOp, OMod) = isOMod(
MI);
1861 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
1862 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
1870 if (
Def->mayRaiseFPException())
1875 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1881 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
1884 MRI->clearKillFlags(
Def->getOperand(0).getReg());
1885 MI.eraseFromParent();
1890 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1891 Def->eraseFromParent();
1900 auto Reg =
MI.getOperand(0).getReg();
1902 if (!
ST->hasGFX90AInsts() || !
TRI->isVGPR(*
MRI, Reg) ||
1903 !
MRI->hasOneNonDBGUse(Reg))
1910 for (
auto &[
Op, SubIdx] : Defs) {
1913 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
1927 if (!
TRI->isVGPR(*
MRI, Reg) || !
MRI->hasOneNonDBGUse(Reg))
1929 Op = &*
MRI->use_nodbg_begin(Reg);
1933 if (
Op->getSubReg())
1939 TII->getRegClass(InstDesc, OpIdx,
TRI, *
MI.getMF());
1940 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
1943 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg));
1944 auto Dst =
MRI->createVirtualRegister(NewDstRC);
1946 TII->get(AMDGPU::REG_SEQUENCE), Dst);
1948 for (
auto &[Def, SubIdx] : Defs) {
1949 Def->setIsKill(
false);
1961 if (!
TII->isOperandLegal(*
UseMI, OpIdx,
Op)) {
1963 RS->eraseFromParent();
1971 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
1972 MI.eraseFromParent();
1980 Register &OutReg,
unsigned &OutSubReg) {
1990 if (
TRI.isAGPR(
MRI, CopySrcReg)) {
1991 OutReg = CopySrcReg;
2000 if (!CopySrcDef || !CopySrcDef->
isCopy())
2007 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2008 !
TRI.isAGPR(
MRI, OtherCopySrcReg))
2011 OutReg = OtherCopySrcReg;
2049 if (!
TRI->isVGPR(*
MRI, PhiOut))
2055 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2058 if (!Copy || !
Copy->isCopy())
2062 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2067 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2078 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2082 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2090 unsigned CopyOpc = AMDGPU::COPY;
2095 if (
Def->isCopy()) {
2097 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2111 if (IsAGPR32 && !
ST->hasGFX90AInsts() && !
MRI->hasOneNonDBGUse(Reg) &&
2113 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2116 InsertMBB =
Def->getParent();
2123 Register NewReg =
MRI->createVirtualRegister(ARC);
2125 TII->get(CopyOpc), NewReg)
2134 Register NewReg =
MRI->createVirtualRegister(ARC);
2135 PHI.getOperand(0).setReg(NewReg);
2141 TII->get(AMDGPU::COPY), PhiOut)
2151 if (!
ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
2172 while (!
Users.empty()) {
2174 if (!
I->isCopy() && !
I->isRegSequence())
2176 Register DstReg =
I->getOperand(0).getReg();
2180 if (
TRI->isAGPR(*
MRI, DstReg))
2184 Users.push_back(&U);
2188 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
2189 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2190 MRI->setRegClass(DefReg, RC);
2194 while (!MoveRegs.
empty()) {
2196 MRI->setRegClass(Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg)));
2239 if (
ST->hasGFX90AInsts())
2246 for (
auto &
MI :
MBB) {
2250 if (!
TRI->isAGPR(*
MRI,
MI.getOperand(0).getReg()))
2253 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2263 bool Changed =
false;
2264 for (
const auto &[Entry, MOs] : RegToMO) {
2265 if (MOs.size() == 1)
2276 MRI->createVirtualRegister(
TRI->getEquivalentVGPRClass(ARC));
2279 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2283 Register TempAGPR =
MRI->createVirtualRegister(ARC);
2285 TII->get(AMDGPU::COPY), TempAGPR)
2304 TII =
ST->getInstrInfo();
2305 TRI = &
TII->getRegisterInfo();
2313 bool HasNSZ = MFI->hasNoSignedZerosFPMath();
2315 bool Changed =
false;
2319 Changed |= tryFoldCndMask(
MI);
2321 if (tryFoldZeroHighBits(
MI)) {
2326 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2331 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2336 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2341 if (
TII->isFoldableCopy(
MI)) {
2342 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2347 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2348 CurrentKnownM0Val =
nullptr;
2354 Changed |= tryFoldClamp(
MI);
2357 Changed |= tryOptimizeAGPRPhis(*
MBB);
2365 bool Changed = SIFoldOperandsImpl().run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
static unsigned getMovOpc(bool IsScalar)
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted=false, int ShrinkOp=-1)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
This holds information about one operand of a machine instruction, indicating the register class for ...
iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DWARFExpression::Operation Op
char & SIFoldOperandsLegacyID
iterator_range< df_iterator< T > > depth_first(const T &G)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
A pair composed of a register and a sub-register index.