19#define DEBUG_TYPE "si-fold-operands"
37 bool Commuted_ =
false,
39 UseMI(
MI), OpToFold(
nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
42 if (FoldOp->
isImm()) {
43 ImmToFold = FoldOp->
getImm();
44 }
else if (FoldOp->
isFI()) {
45 FrameIndexToFold = FoldOp->
getIndex();
66 bool needsShrink()
const {
return ShrinkOpcode != -1; }
89 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
111 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
135 "SI Fold Operands",
false,
false)
137char SIFoldOperands::
ID = 0;
144 case AMDGPU::V_MAC_F32_e64:
145 return AMDGPU::V_MAD_F32_e64;
146 case AMDGPU::V_MAC_F16_e64:
147 return AMDGPU::V_MAD_F16_e64;
148 case AMDGPU::V_FMAC_F32_e64:
149 return AMDGPU::V_FMA_F32_e64;
150 case AMDGPU::V_FMAC_F16_e64:
151 return AMDGPU::V_FMA_F16_gfx9_e64;
152 case AMDGPU::V_FMAC_F16_t16_e64:
153 return AMDGPU::V_FMA_F16_gfx9_e64;
154 case AMDGPU::V_FMAC_LEGACY_F32_e64:
155 return AMDGPU::V_FMA_LEGACY_F32_e64;
156 case AMDGPU::V_FMAC_F64_e64:
157 return AMDGPU::V_FMA_F64_e64;
159 return AMDGPU::INSTRUCTION_LIST_END;
166 if (!OpToFold.
isFI())
169 const unsigned Opc =
UseMI.getOpcode();
171 return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
175 int SIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
179 int VIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
180 return OpNo == VIdx && SIdx == -1;
184 return new SIFoldOperands();
187bool SIFoldOperands::updateOperand(FoldCandidate &Fold)
const {
197 AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
198 ST->hasInv2PiInlineImm())) {
201 unsigned Opcode =
MI->getOpcode();
202 int OpNo =
MI->getOperandNo(&Old);
204 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
205 ModIdx = AMDGPU::OpName::src0_modifiers;
206 else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
207 ModIdx = AMDGPU::OpName::src1_modifiers;
208 else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
209 ModIdx = AMDGPU::OpName::src2_modifiers;
211 ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
213 unsigned Val =
Mod.getImm();
217 switch (
TII->get(Opcode).operands()[OpNo].OperandType) {
218 case AMDGPU::OPERAND_REG_IMM_V2FP16:
219 case AMDGPU::OPERAND_REG_IMM_V2INT16:
220 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
221 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
223 if (!isUInt<16>(Fold.ImmToFold)) {
224 if (!(Fold.ImmToFold & 0xffff)) {
242 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
250 int Op32 = Fold.ShrinkOpcode;
255 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
258 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
262 if (HaveNonDbgCarryUse) {
275 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
276 MI->removeOperand(
I);
277 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
280 TII->commuteInstruction(*Inst32,
false);
284 assert(!Fold.needsShrink() &&
"not handled");
288 int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(
MI->getOpcode());
289 if (NewMFMAOpc == -1)
291 MI->setDesc(
TII->get(NewMFMAOpc));
292 MI->untieRegOperand(0);
298 if (Fold.isGlobal()) {
299 Old.
ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
300 Fold.OpToFold->getTargetFlags());
317 return any_of(FoldList, [&](
const auto &
C) {
return C.UseMI ==
MI; });
325 for (FoldCandidate &Fold : FoldList)
326 if (Fold.UseMI ==
MI && Fold.UseOpNo == OpNo)
328 LLVM_DEBUG(
dbgs() <<
"Append " << (Commuted ?
"commuted" :
"normal")
329 <<
" operand " << OpNo <<
"\n " << *
MI);
336 if (!
TII->isOperandLegal(*
MI, OpNo, OpToFold)) {
338 unsigned Opc =
MI->getOpcode();
340 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
343 MI->setDesc(
TII->get(NewOpc));
344 bool AddOpSel = !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel) &&
345 AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel);
348 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
350 MI->untieRegOperand(OpNo);
354 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
355 MI->setDesc(
TII->get(Opc));
359 if (OpToFold->
isImm()) {
361 if (Opc == AMDGPU::S_SETREG_B32)
362 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
363 else if (Opc == AMDGPU::S_SETREG_B32_mode)
364 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
366 MI->setDesc(
TII->get(ImmOpc));
378 unsigned CommuteOpNo = OpNo;
384 bool CanCommute =
TII->findCommutedOpIndices(*
MI, CommuteIdx0, CommuteIdx1);
387 if (CommuteIdx0 == OpNo)
388 CommuteOpNo = CommuteIdx1;
389 else if (CommuteIdx1 == OpNo)
390 CommuteOpNo = CommuteIdx0;
398 if (CanCommute && (!
MI->getOperand(CommuteIdx0).isReg() ||
399 !
MI->getOperand(CommuteIdx1).isReg()))
403 !
TII->commuteInstruction(*
MI,
false, CommuteIdx0, CommuteIdx1))
406 if (!
TII->isOperandLegal(*
MI, CommuteOpNo, OpToFold)) {
407 if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
408 Opc == AMDGPU::V_SUB_CO_U32_e64 ||
409 Opc == AMDGPU::V_SUBREV_CO_U32_e64) &&
414 unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
416 if (!OtherOp.
isReg() ||
423 unsigned MaybeCommutedOpc =
MI->getOpcode();
424 int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
430 TII->commuteInstruction(*
MI,
false, CommuteIdx0, CommuteIdx1);
440 if (
TII->isSALU(
MI->getOpcode())) {
445 if (!OpToFold->
isReg() && !
TII->isInlineConstant(*OpToFold, OpInfo)) {
447 for (
unsigned i = 0, e = InstDesc.
getNumOperands(); i != e; ++i) {
448 auto &
Op =
MI->getOperand(i);
449 if (OpNo != i && !
Op.isReg() && !
TII->isInlineConstant(Op, OpInfo))
462 return !
TII->isSDWA(
MI);
468bool SIFoldOperands::getRegSeqInit(
472 if (!Def || !
Def->isRegSequence())
475 for (
unsigned I = 1,
E =
Def->getNumExplicitOperands();
I <
E;
I += 2) {
482 SubDef =
MRI->getVRegDef(Sub->
getReg())) {
485 if (
TII->isInlineConstant(*Op, OpTy))
489 if (!
Op->isReg() ||
Op->getReg().isPhysical())
494 Defs.emplace_back(Sub,
Def->getOperand(
I + 1).getImm());
500bool SIFoldOperands::tryToFoldACImm(
508 if ((OpTy < AMDGPU::OPERAND_REG_INLINE_AC_FIRST ||
509 OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST) &&
510 (OpTy < AMDGPU::OPERAND_REG_INLINE_C_FIRST ||
511 OpTy > AMDGPU::OPERAND_REG_INLINE_C_LAST))
514 if (OpToFold.
isImm() &&
TII->isInlineConstant(OpToFold, OpTy) &&
520 if (!OpToFold.
isReg())
533 if (!UseOp.
getSubReg() && Def &&
TII->isFoldableCopy(*Def)) {
535 if (DefOp.
isImm() &&
TII->isInlineConstant(DefOp, OpTy) &&
543 if (!getRegSeqInit(Defs,
UseReg, OpTy))
547 for (
unsigned I = 0,
E = Defs.
size();
I !=
E; ++
I) {
552 auto SubImm =
Op->getImm();
555 if (!
TII->isInlineConstant(*Op, OpTy) ||
569void SIFoldOperands::foldOperand(
577 if (!isUseSafeToFold(*
UseMI, UseOp))
599 if (RSUse.getSubReg() != RegSeqDstSubReg)
602 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(&RSUse), FoldList,
617 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
618 MFI->getScratchRSrcReg())
624 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
635 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) &&
636 !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::saddr)) {
637 unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc);
644 bool FoldingImmLike =
662 if (
TRI->isSGPRClass(SrcRC) &&
TRI->hasVectorRegisters(DestRC)) {
664 for (
auto &
Use :
MRI->use_nodbg_operands(DestReg)) {
666 if (
Use.isImplicit())
670 Use.getParent()->getOperandNo(&
Use),
674 for (
auto &
F : CopyUses) {
675 foldOperand(*
F.OpToFold,
F.UseMI,
F.UseOpNo, FoldList,
680 if (DestRC == &AMDGPU::AGPR_32RegClass &&
681 TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
692 unsigned MovOp =
TII->getMovOpcode(DestRC);
693 if (MovOp == AMDGPU::COPY)
699 while (ImpOpI != ImpOpE) {
727 getRegSeqInit(Defs,
UseReg, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
738 for (
unsigned I = 0;
I <
Size / 4; ++
I) {
742 TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
743 int64_t
Imm =
Def->getImm();
745 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
747 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addImm(Imm);
749 }
else if (
Def->isReg() &&
TRI->isAGPR(*
MRI,
Def->getReg())) {
751 Def->setIsKill(
false);
752 if (!SeenAGPRs.
insert(Src)) {
763 Def->setIsKill(
false);
769 if (
TRI->isSGPRReg(*
MRI, Src.Reg)) {
772 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
778 if (CopyToVGPR.
Reg) {
780 if (VGPRCopies.
count(CopyToVGPR)) {
781 Vgpr = VGPRCopies[CopyToVGPR];
783 Vgpr =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
785 VGPRCopies[CopyToVGPR] = Vgpr;
787 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
789 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addReg(Vgpr);
793 B.addImm(Defs[
I].second);
806 else if (
TRI->isVGPR(*
MRI, Reg0) &&
TRI->isAGPR(*
MRI, Reg1))
808 else if (
ST->hasGFX90AInsts() &&
TRI->isAGPR(*
MRI, Reg0) &&
815 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
816 (UseOpc == AMDGPU::V_READLANE_B32 &&
818 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
823 if (FoldingImmLike) {
832 if (OpToFold.
isImm())
869 if (!FoldingImmLike) {
870 if (OpToFold.
isReg() &&
ST->needsAlignedVGPRs()) {
874 if (
TRI->hasVectorRegisters(RC) && OpToFold.
getSubReg()) {
881 if (!RC || !
TRI->isProperlyAlignedRC(*RC))
899 if (UseOp.
getSubReg() && AMDGPU::getRegBitWidth(FoldRC->
getID()) == 64) {
903 if (AMDGPU::getRegBitWidth(UseRC->
getID()) != 64)
925 case AMDGPU::V_AND_B32_e64:
926 case AMDGPU::V_AND_B32_e32:
927 case AMDGPU::S_AND_B32:
930 case AMDGPU::V_OR_B32_e64:
931 case AMDGPU::V_OR_B32_e32:
932 case AMDGPU::S_OR_B32:
935 case AMDGPU::V_XOR_B32_e64:
936 case AMDGPU::V_XOR_B32_e32:
937 case AMDGPU::S_XOR_B32:
940 case AMDGPU::S_XNOR_B32:
943 case AMDGPU::S_NAND_B32:
946 case AMDGPU::S_NOR_B32:
949 case AMDGPU::S_ANDN2_B32:
952 case AMDGPU::S_ORN2_B32:
955 case AMDGPU::V_LSHL_B32_e64:
956 case AMDGPU::V_LSHL_B32_e32:
957 case AMDGPU::S_LSHL_B32:
959 Result =
LHS << (
RHS & 31);
961 case AMDGPU::V_LSHLREV_B32_e64:
962 case AMDGPU::V_LSHLREV_B32_e32:
963 Result =
RHS << (
LHS & 31);
965 case AMDGPU::V_LSHR_B32_e64:
966 case AMDGPU::V_LSHR_B32_e32:
967 case AMDGPU::S_LSHR_B32:
968 Result =
LHS >> (
RHS & 31);
970 case AMDGPU::V_LSHRREV_B32_e64:
971 case AMDGPU::V_LSHRREV_B32_e32:
972 Result =
RHS >> (
LHS & 31);
974 case AMDGPU::V_ASHR_I32_e64:
975 case AMDGPU::V_ASHR_I32_e32:
976 case AMDGPU::S_ASHR_I32:
977 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
979 case AMDGPU::V_ASHRREV_I32_e64:
980 case AMDGPU::V_ASHRREV_I32_e32:
981 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
989 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1002 for (
unsigned I =
MI.getNumOperands() - 1;
I >= NumOps; --
I)
1003 MI.removeOperand(
I);
1007SIFoldOperands::getImmOrMaterializedImm(
MachineOperand &Op)
const {
1009 if (!
Op.isReg() ||
Op.getSubReg() != AMDGPU::NoSubRegister ||
1010 !
Op.getReg().isVirtual())
1014 if (Def &&
Def->isMoveImmediate()) {
1027 unsigned Opc =
MI->getOpcode();
1029 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1034 if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1035 Opc == AMDGPU::S_NOT_B32) &&
1037 MI->getOperand(1).ChangeToImmediate(~Src0->
getImm());
1042 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1058 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1062 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1063 MI->removeOperand(Src1Idx);
1068 if (!
MI->isCommutable())
1076 int32_t Src1Val =
static_cast<int32_t
>(Src1->
getImm());
1077 if (Opc == AMDGPU::V_OR_B32_e64 ||
1078 Opc == AMDGPU::V_OR_B32_e32 ||
1079 Opc == AMDGPU::S_OR_B32) {
1082 MI->removeOperand(Src1Idx);
1084 }
else if (Src1Val == -1) {
1086 MI->removeOperand(Src1Idx);
1094 if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 ||
1095 Opc == AMDGPU::S_AND_B32) {
1098 MI->removeOperand(Src0Idx);
1100 }
else if (Src1Val == -1) {
1102 MI->removeOperand(Src1Idx);
1110 if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 ||
1111 Opc == AMDGPU::S_XOR_B32) {
1114 MI->removeOperand(Src1Idx);
1125 unsigned Opc =
MI.getOpcode();
1126 if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1127 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1133 auto *Src0Imm = getImmOrMaterializedImm(*Src0);
1134 auto *Src1Imm = getImmOrMaterializedImm(*Src1);
1135 if (!Src1Imm->isIdenticalTo(*Src0Imm))
1140 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1142 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1143 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1144 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1150 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1152 MI.removeOperand(Src2Idx);
1153 MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
1154 if (Src1ModIdx != -1)
1155 MI.removeOperand(Src1ModIdx);
1156 if (Src0ModIdx != -1)
1157 MI.removeOperand(Src0ModIdx);
1163bool SIFoldOperands::tryFoldZeroHighBits(
MachineInstr &
MI)
const {
1164 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1165 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1174 if (!
ST->zeroesHigh16BitsOfDest(SrcDef->
getOpcode()))
1179 MI.eraseFromParent();
1191 bool Changed =
false;
1193 if (OpToFold.
isImm()) {
1204 if (tryConstantFoldOp(&
UseMI)) {
1212 for (
auto &
Use :
MRI->use_nodbg_operands(Dst.getReg()))
1214 for (
auto *U : UsesToProcess) {
1220 if (CopiesToReplace.
empty() && FoldList.
empty())
1226 Copy->addImplicitDefUseOperands(*MF);
1228 for (FoldCandidate &Fold : FoldList) {
1229 assert(!Fold.isReg() || Fold.OpToFold);
1230 if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
1240 assert(Fold.OpToFold && Fold.OpToFold->isReg());
1244 MRI->clearKillFlags(Fold.OpToFold->getReg());
1247 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1249 }
else if (Fold.Commuted) {
1251 TII->commuteInstruction(*Fold.UseMI,
false);
1257bool SIFoldOperands::tryFoldFoldableCopy(
1261 if (
MI.getOperand(0).getReg() == AMDGPU::M0) {
1263 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1264 MI.eraseFromParent();
1279 if (!FoldingImm && !OpToFold.
isReg())
1291 if (!
MI.getOperand(0).getReg().isVirtual())
1294 bool Changed = foldInstOperand(
MI, OpToFold);
1301 auto *InstToErase = &
MI;
1302 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1303 auto &
SrcOp = InstToErase->getOperand(1);
1305 InstToErase->eraseFromParent();
1307 InstToErase =
nullptr;
1310 InstToErase =
MRI->getVRegDef(SrcReg);
1311 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
1315 if (InstToErase && InstToErase->isRegSequence() &&
1316 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1317 InstToErase->eraseFromParent();
1327 unsigned Op =
MI.getOpcode();
1329 case AMDGPU::V_MAX_F32_e64:
1330 case AMDGPU::V_MAX_F16_e64:
1331 case AMDGPU::V_MAX_F16_t16_e64:
1332 case AMDGPU::V_MAX_F64_e64:
1333 case AMDGPU::V_PK_MAX_F16: {
1334 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
1343 Src0->
getSubReg() != AMDGPU::NoSubRegister)
1347 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1351 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
1353 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
1359 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1371 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
1377 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
1384 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
1388 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
1389 MI.eraseFromParent();
1394 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1395 Def->eraseFromParent();
1402 case AMDGPU::V_MUL_F64_e64: {
1404 case 0x3fe0000000000000:
1406 case 0x4000000000000000:
1408 case 0x4010000000000000:
1414 case AMDGPU::V_MUL_F32_e64: {
1415 switch (
static_cast<uint32_t>(Val)) {
1426 case AMDGPU::V_MUL_F16_e64:
1427 case AMDGPU::V_MUL_F16_t16_e64: {
1428 switch (
static_cast<uint16_t>(Val)) {
1447std::pair<const MachineOperand *, int>
1449 unsigned Op =
MI.getOpcode();
1451 case AMDGPU::V_MUL_F64_e64:
1452 case AMDGPU::V_MUL_F32_e64:
1453 case AMDGPU::V_MUL_F16_t16_e64:
1454 case AMDGPU::V_MUL_F16_e64: {
1456 if ((Op == AMDGPU::V_MUL_F32_e64 &&
1458 ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
1459 Op == AMDGPU::V_MUL_F16_t16_e64) &&
1467 if (Src0->
isImm()) {
1470 }
else if (Src1->
isImm()) {
1478 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
1479 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
1480 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
1481 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
1484 return std::pair(RegOp, OMod);
1486 case AMDGPU::V_ADD_F64_e64:
1487 case AMDGPU::V_ADD_F32_e64:
1488 case AMDGPU::V_ADD_F16_e64:
1489 case AMDGPU::V_ADD_F16_t16_e64: {
1491 if ((Op == AMDGPU::V_ADD_F32_e64 &&
1493 ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
1494 Op == AMDGPU::V_ADD_F16_t16_e64) &&
1504 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
1505 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
1506 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
1507 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1521 std::tie(RegOp, OMod) = isOMod(
MI);
1523 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
1524 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
1534 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1540 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
1541 MI.eraseFromParent();
1546 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1547 Def->eraseFromParent();
1556 auto Reg =
MI.getOperand(0).getReg();
1558 if (!
ST->hasGFX90AInsts() || !
TRI->isVGPR(*
MRI, Reg) ||
1559 !
MRI->hasOneNonDBGUse(Reg))
1566 for (
auto &Def : Defs) {
1567 const auto *
Op =
Def.first;
1570 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
1584 if (!
TRI->isVGPR(*
MRI, Reg) || !
MRI->hasOneNonDBGUse(Reg))
1586 Op = &*
MRI->use_nodbg_begin(Reg);
1590 if (
Op->getSubReg())
1596 TII->getRegClass(InstDesc, OpIdx,
TRI, *
MI.getMF());
1597 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
1600 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg));
1601 auto Dst =
MRI->createVirtualRegister(NewDstRC);
1603 TII->get(AMDGPU::REG_SEQUENCE), Dst);
1605 for (
unsigned I = 0;
I < Defs.size(); ++
I) {
1607 Def->setIsKill(
false);
1615 RS.addImm(Defs[
I].second);
1619 if (!
TII->isOperandLegal(*
UseMI, OpIdx, Op)) {
1621 RS->eraseFromParent();
1629 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
1630 MI.eraseFromParent();
1645 if (
PHI.getNumExplicitOperands() != 3)
1650 if (
PHI.getOperand(1).getSubReg() ||
1651 !
TRI->isVGPR(*
MRI, PhiIn) || !
TRI->isVGPR(*
MRI, PhiOut))
1656 if (!
MRI->hasOneNonDBGUse(PhiIn))
1660 if (!Copy || !
Copy->isCopy())
1664 if (!
TRI->isAGPR(*
MRI, CopyIn) ||
Copy->getOperand(1).getSubReg())
1668 Register NewReg =
MRI->createVirtualRegister(ARC);
1669 PHI.getOperand(1).setReg(CopyIn);
1670 PHI.getOperand(0).setReg(NewReg);
1674 TII->get(AMDGPU::COPY), PhiOut)
1676 Copy->eraseFromParent();
1686 if (!
ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
1707 while (!
Users.empty()) {
1709 if (!
I->isCopy() && !
I->isRegSequence())
1711 Register DstReg =
I->getOperand(0).getReg();
1715 if (
TRI->isAGPR(*
MRI, DstReg))
1719 Users.push_back(&U);
1723 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
1724 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
1725 MRI->setRegClass(DefReg, RC);
1729 while (!MoveRegs.
empty()) {
1731 MRI->setRegClass(Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg)));
1745 TII =
ST->getInstrInfo();
1746 TRI = &
TII->getRegisterInfo();
1754 bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1756 bool Changed =
false;
1760 Changed |= tryFoldCndMask(
MI);
1762 if (tryFoldZeroHighBits(
MI)) {
1767 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
1772 if (
MI.isPHI() && tryFoldLCSSAPhi(
MI)) {
1777 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
1782 if (
TII->isFoldableCopy(
MI)) {
1783 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
1788 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
1789 CurrentKnownM0Val =
nullptr;
1795 Changed |= tryFoldClamp(
MI);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
static unsigned getMovOpc(bool IsScalar)
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted=false, int ShrinkOp=-1)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
unsigned getID() const
Return the register class ID number.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createSIFoldOperandsPass()
void initializeSIFoldOperandsPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
A pair composed of a register and a sub-register index.