84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping() {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(
S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(
S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0, E =
MI.getNumExplicitDefs();
I != E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
334 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I],
Sizes[
I]);
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
478 switch (
MI.getOpcode()) {
479 case TargetOpcode::G_CONSTANT:
480 case TargetOpcode::G_IMPLICIT_DEF: {
484 { { AMDGPU::VGPRRegBankID }, 1 },
485 { { AMDGPU::SGPRRegBankID }, 1 },
486 { { AMDGPU::VCCRegBankID }, 1 }
489 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
494 case TargetOpcode::G_FCONSTANT:
495 case TargetOpcode::G_FRAME_INDEX:
496 case TargetOpcode::G_GLOBAL_VALUE: {
498 { { AMDGPU::VGPRRegBankID }, 1 },
499 { { AMDGPU::SGPRRegBankID }, 1 }
502 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
504 case TargetOpcode::G_AND:
505 case TargetOpcode::G_OR:
506 case TargetOpcode::G_XOR: {
513 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
514 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
515 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
521 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
522 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
523 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
534 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
535 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
536 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
542 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
543 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
544 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
549 case TargetOpcode::G_LOAD:
550 case TargetOpcode::G_ZEXTLOAD:
551 case TargetOpcode::G_SEXTLOAD: {
553 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
562 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
563 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
571 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
572 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
585 case TargetOpcode::G_SELECT: {
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
590 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
591 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
597 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
598 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
599 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
605 case TargetOpcode::G_UADDE:
606 case TargetOpcode::G_USUBE:
607 case TargetOpcode::G_SADDE:
608 case TargetOpcode::G_SSUBE: {
612 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
615 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
616 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
622 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
623 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
624 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
625 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
630 case AMDGPU::G_BRCOND: {
631 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
636 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
642 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
647 case AMDGPU::G_INTRINSIC:
648 case AMDGPU::G_INTRINSIC_CONVERGENT:
650 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
651 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
666 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
667 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
669 MRI->setRegBank(LoLHS, *Bank);
670 MRI->setRegBank(HiLHS, *Bank);
675 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
686 MRI.setType(Reg, NewTy);
706 LLT Ty =
MRI.getType(Src);
709 if (Bank == &AMDGPU::SGPRRegBank)
715 if (Bank != &AMDGPU::VGPRRegBank) {
717 Src =
B.buildCopy(Ty, Src).getReg(0);
718 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
722 unsigned NumParts = Bits / 32;
729 auto Unmerge =
B.buildUnmerge(
S32, Src);
730 for (
unsigned i = 0; i < NumParts; ++i)
734 for (
unsigned i = 0; i < NumParts; ++i) {
736 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737 MRI.setType(DstPart, NumParts == 1 ? Ty :
S32);
742 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
744 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
786 const unsigned MovExecOpc =
788 const unsigned MovExecTermOpc =
792 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
794 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
796 AMDGPU::EXEC_LO : AMDGPU::EXEC;
799 const int OrigRangeSize = std::distance(Range.begin(), Range.end());
803 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
804 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
807 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
808 .addDef(InitSaveExecReg);
810 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
811 Register NewExec =
MRI.createVirtualRegister(WaveRC);
837 B.setInsertPt(*LoopBB, LoopBB->
end());
839 B.buildInstr(TargetOpcode::PHI)
841 .addReg(InitSaveExecReg)
856 auto NewEnd = BodyBB->
end();
863 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
868 if (!SGPROperandRegs.
count(OldReg))
873 auto OldVal = WaterfalledRegMap.
find(OldReg);
874 if (OldVal != WaterfalledRegMap.
end()) {
875 Op.setReg(OldVal->second);
880 LLT OpTy =
MRI.getType(OpReg);
883 if (OpBank != &AMDGPU::VGPRRegBank) {
886 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
887 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
895 bool Is64 = OpSize % 64 == 0;
896 unsigned PartSize = Is64 ? 64 : 32;
898 unsigned NumParts = OpSize / PartSize;
904 CurrentLaneParts.
push_back(CurrentLaneReg);
906 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
907 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
908 for (
unsigned i = 0; i < NumParts; ++i) {
910 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
911 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
912 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
916 for (
unsigned i = 0; i < NumParts; ++i) {
918 OpParts[i]).getReg(0);
919 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
924 CondReg =
B.buildAnd(
S1, CondReg, CmpReg).getReg(0);
925 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
929 Op.setReg(CurrentLaneReg);
932 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
937 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
941 MRI.setRegClass(CondReg, WaveRC);
944 B.buildInstr(AndSaveExecOpc)
948 MRI.setSimpleHint(NewExec, CondReg);
950 B.setInsertPt(*BodyBB, BodyBB->
end());
953 B.buildInstr(XorTermOpc)
962 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
969 B.setMBB(*RestoreExecBB);
970 B.buildInstr(MovExecTermOpc)
972 .addReg(SaveExecReg);
976 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
988 for (
unsigned Op : OpIndices) {
992 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
993 SGPROperandRegs.
insert(Reg);
997 return !SGPROperandRegs.
empty();
1017 Register Reg =
MI.getOperand(OpIdx).getReg();
1020 if (Bank == &AMDGPU::SGPRRegBank)
1024 MI.getOperand(OpIdx).setReg(Reg);
1036 assert(FirstSize % EltSize == 0);
1038 unsigned FirstPartNumElts = FirstSize / EltSize;
1039 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1060 const LLT LoadTy =
MRI.getType(DstReg);
1062 const unsigned MaxNonSmrdLoadSize = 128;
1066 if (DstBank == &AMDGPU::SGPRRegBank) {
1078 if (LoadSize == 32 &&
1082 if (LoadSize == 32 &&
1091 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1093 if (LoadSize == 32) {
1097 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1099 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1100 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1101 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1103 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1104 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1107 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1121 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1123 B.buildTrunc(
MI.getOperand(0), WideLoad);
1125 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1131 MI.eraseFromParent();
1136 if (LoadSize <= MaxNonSmrdLoadSize)
1142 if (SrcRegs.
empty())
1145 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1150 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1151 MRI.setType(BasePtrReg, PtrTy);
1153 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1154 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1155 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1166 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1177 const auto &TFI = *ST.getFrameLowering();
1185 Register AllocSize =
MI.getOperand(1).getReg();
1191 if (SizeBank != &AMDGPU::SGPRRegBank)
1194 LLT PtrTy =
MRI.getType(Dst);
1199 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1201 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1202 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1204 auto SPCopy =
B.buildCopy(PtrTy, SPReg);
1205 if (Alignment > TFI.getStackAlign()) {
1206 auto PtrAdd =
B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
1207 B.buildMaskLowPtrBits(Dst, PtrAdd,
1208 Log2(Alignment) + ST.getWavefrontSizeLog2());
1210 B.buildPtrAdd(Dst, SPCopy, ScaledSize);
1213 MI.eraseFromParent();
1220 int RsrcIdx)
const {
1221 const int NumDefs =
MI.getNumExplicitDefs();
1225 RsrcIdx += NumDefs + 1;
1232 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1233 if (!
MI.getOperand(
I).isReg())
1237 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1249 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1253 if (std::optional<int64_t> Imm =
1257 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1258 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1259 InstOffsetVal = ImmOffset;
1261 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1262 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1263 return SOffset + ImmOffset;
1278 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1279 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1280 InstOffsetVal = ImmOffset;
1286 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1287 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1289 InstOffsetVal = ImmOffset;
1303 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1309 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1319 VOffsetReg = CombinedOffset;
1321 VOffsetReg =
B.buildCopy(
S32, CombinedOffset).getReg(0);
1322 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1325 SOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1326 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1337 LLT Ty =
MRI.getType(Dst);
1343 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1344 OffsetBank == &AMDGPU::SGPRRegBank)
1352 if (LoadSize == 256 || LoadSize == 512) {
1353 NumLoads = LoadSize / 128;
1354 Ty = Ty.
divide(NumLoads);
1359 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1365 int64_t ImmOffset = 0;
1368 SOffset, ImmOffset, Alignment);
1373 const Align MemAlign(4);
1387 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1394 for (
int i = 0; i < NumLoads; ++i) {
1395 if (NumLoads == 1) {
1398 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1399 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1406 B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1407 .addDef(LoadParts[i])
1412 .addImm(ImmOffset + 16 * i)
1415 .addMemOperand(MMO);
1421 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1424 B.setInstr(*Span.
begin());
1425 MI.eraseFromParent();
1429 OpsToWaterfall.
insert(RSrc);
1434 if (NumLoads != 1) {
1436 B.buildConcatVectors(Dst, LoadParts);
1438 B.buildMergeLikeInstr(Dst, LoadParts);
1442 if (RSrcBank == &AMDGPU::SGPRRegBank)
1443 MI.eraseFromParent();
1458 LLT Ty =
MRI.getType(DstReg);
1462 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1463 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1464 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1465 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1469 if (DstBank == &AMDGPU::VGPRRegBank) {
1475 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1479 auto ShiftOffset =
Signed ?
B.buildAShr(
S64, SrcReg, OffsetReg)
1480 :
B.buildLShr(
S64, SrcReg, OffsetReg);
1481 auto UnmergeSOffset =
B.buildUnmerge({
S32,
S32}, ShiftOffset);
1488 auto Zero =
B.buildConstant(
S32, 0);
1489 auto WidthImm = ConstWidth->Value.getZExtValue();
1490 if (WidthImm <= 32) {
1494 Signed ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1495 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1497 Signed ?
B.buildAShr(
S32, Extract,
B.buildConstant(
S32, 31)) : Zero;
1498 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1502 auto UpperWidth =
B.buildConstant(
S32, WidthImm - 32);
1505 ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1506 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1507 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1509 MI.eraseFromParent();
1515 auto ExtShift =
B.buildSub(
S32,
B.buildConstant(
S32, 64), WidthReg);
1516 auto SignBit =
B.buildShl(
S64, ShiftOffset, ExtShift);
1518 B.buildAShr(
S64, SignBit, ExtShift);
1520 B.buildLShr(
S64, SignBit, ExtShift);
1521 MI.eraseFromParent();
1527 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1530 auto OffsetMask =
B.buildConstant(
S32, maskTrailingOnes<unsigned>(6));
1531 auto ClampOffset =
B.buildAnd(
S32, OffsetReg, OffsetMask);
1534 auto ShiftWidth =
B.buildShl(
S32, WidthReg,
B.buildConstant(
S32, 16));
1539 auto MergedInputs =
B.buildOr(
S32, ClampOffset, ShiftWidth);
1543 unsigned Opc = Ty ==
S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1544 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1546 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1550 MI.eraseFromParent();
1568 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1571 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1575 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1576 bool Accumulate =
true;
1585 Register DstLo =
B.buildMul(
S32, Src0, Src1).getReg(0);
1586 bool MulHiInVgpr =
false;
1588 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1591 DstHi = IsUnsigned ?
B.buildUMulH(
S32, Src0, Src1).getReg(0)
1592 :
B.buildSMulH(
S32, Src0, Src1).getReg(0);
1593 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1598 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1599 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1601 DstHi = IsUnsigned ?
B.buildUMulH(
S32, VSrc0, VSrc1).getReg(0)
1602 :
B.buildSMulH(
S32, VSrc0, VSrc1).getReg(0);
1603 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1619 LLT CarryType = DstOnValu ?
S1 :
S32;
1621 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1623 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1628 Zero =
B.buildConstant(
S32, 0).getReg(0);
1629 MRI.setRegBank(Zero,
1630 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1634 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1635 : AMDGPU::SGPRRegBank);
1637 if (DstOnValu && !MulHiInVgpr) {
1638 Carry =
B.buildTrunc(
S1, Carry).getReg(0);
1639 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1645 DstLo =
B.buildCopy(
S32, DstLo).getReg(0);
1646 DstHi =
B.buildCopy(
S32, DstHi).getReg(0);
1647 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1648 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1651 auto Unmerge =
B.buildUnmerge(
S32, Src2);
1652 Register Src2Lo = Unmerge.getReg(0);
1653 Register Src2Hi = Unmerge.getReg(1);
1654 MRI.setRegBank(Src2Lo, DstBank);
1655 MRI.setRegBank(Src2Hi, DstBank);
1659 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1661 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1662 MRI.setRegBank(Carry, CarryBank);
1665 auto AddLo =
B.buildUAddo(
S32, CarryType, DstLo, Src2Lo);
1666 DstLo = AddLo.getReg(0);
1667 Register CarryLo = AddLo.getReg(1);
1668 MRI.setRegBank(DstLo, DstBank);
1669 MRI.setRegBank(CarryLo, CarryBank);
1671 auto AddHi =
B.buildUAdde(
S32, CarryType, DstHi, Src2Hi, CarryLo);
1672 DstHi = AddHi.getReg(0);
1673 MRI.setRegBank(DstHi, DstBank);
1675 Register CarryHi = AddHi.getReg(1);
1676 MRI.setRegBank(CarryHi, CarryBank);
1681 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1682 MRI.setRegBank(Carry, CarryBank);
1686 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1687 MRI.setRegBank(Carry, CarryBank);
1691 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1694 B.buildCopy(Dst1, Carry);
1696 B.buildTrunc(Dst1, Carry);
1699 MI.eraseFromParent();
1706 case TargetOpcode::G_ASHR:
1707 case TargetOpcode::G_SMIN:
1708 case TargetOpcode::G_SMAX:
1709 return TargetOpcode::G_SEXT;
1710 case TargetOpcode::G_LSHR:
1711 case TargetOpcode::G_UMIN:
1712 case TargetOpcode::G_UMAX:
1713 return TargetOpcode::G_ZEXT;
1715 return TargetOpcode::G_ANYEXT;
1721static std::pair<Register, Register>
1724 auto Bitcast =
B.buildBitcast(
S32, Src);
1726 if (ExtOpcode == TargetOpcode::G_SEXT) {
1727 auto ExtLo =
B.buildSExtInReg(
S32, Bitcast, 16);
1728 auto ShiftHi =
B.buildAShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1729 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1732 auto ShiftHi =
B.buildLShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1733 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1734 auto ExtLo =
B.buildAnd(
S32, Bitcast,
B.buildConstant(
S32, 0xffff));
1735 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1738 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1739 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1747 if (!SrcReg.
empty()) {
1764 LLT StoreVT =
MRI.getType(Reg);
1768 auto Unmerge =
B.buildUnmerge(
S16, Reg);
1772 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
1782static std::pair<Register, unsigned>
1786 return std::pair(
Register(), Const);
1790 return std::pair(
Base, Const);
1793 return std::pair(Reg, 0);
1796std::pair<Register, unsigned>
1809 if (ImmOffset != 0) {
1818 unsigned Overflow = ImmOffset & ~MaxImm;
1819 ImmOffset -= Overflow;
1820 if ((int32_t)Overflow < 0) {
1821 Overflow += ImmOffset;
1826 if (Overflow != 0) {
1828 BaseReg =
B.buildConstant(
S32, Overflow).getReg(0);
1830 auto OverflowVal =
B.buildConstant(
S32, Overflow);
1831 BaseReg =
B.buildAdd(
S32, BaseReg, OverflowVal).getReg(0);
1837 BaseReg =
B.buildConstant(
S32, 0).getReg(0);
1839 return {BaseReg, C1};
1845 LLT SrcTy =
MRI.getType(SrcReg);
1848 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1855 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1856 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1858 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1860 .addUse(SrcReg, 0, AMDGPU::sub0);
1861 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1863 .addUse(SrcReg, 0, AMDGPU::sub1);
1864 B.buildInstr(AMDGPU::REG_SEQUENCE)
1867 .addImm(AMDGPU::sub0)
1869 .addImm(AMDGPU::sub1);
1880 unsigned ConstOffset) {
1886 auto MaterializedOffset =
B.buildConstant(
S32, ConstOffset);
1888 auto Add =
B.buildAdd(
S32, WaterfallIdx, MaterializedOffset);
1889 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1890 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1902 bool IsBooleanSrc =
false) {
1903 if (ExtOpc == AMDGPU::G_ZEXT) {
1904 B.buildConstant(Hi32Reg, 0);
1905 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1909 B.buildCopy(Hi32Reg, Lo32Reg);
1913 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1914 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1917 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1918 B.buildUndef(Hi32Reg);
1922bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1924 const OperandsMapper &OpdMapper)
const {
1931 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1933 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1935 LLT VecTy =
MRI.getType(VecReg);
1946 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1948 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1951 (DstBank == AMDGPU::SGPRRegBank &&
1952 SrcBank == AMDGPU::SGPRRegBank &&
1953 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1954 : AMDGPU::VCCRegBank;
1957 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1958 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
1959 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1964 unsigned NumLanes = DstRegs.size();
1968 EltTy =
MRI.getType(DstRegs[0]);
1970 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1972 for (
unsigned L = 0;
L < NumLanes; ++
L)
1973 Res[L] = UnmergeToEltTy.getReg(L);
1975 for (
unsigned I = 1;
I < NumElem; ++
I) {
1976 auto IC =
B.buildConstant(
S32,
I);
1977 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
1979 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
1981 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1982 auto S =
B.buildSelect(EltTy, Cmp,
1983 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
1985 for (
unsigned N : { 0, 2, 3 })
1986 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
1988 Res[
L] = S->getOperand(0).getReg();
1992 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1993 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
1994 B.buildCopy(DstReg, Res[L]);
1995 MRI.setRegBank(DstReg, DstBank);
1998 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
1999 MI.eraseFromParent();
2010 if (CurrBank && *CurrBank != Bank) {
2011 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2012 MRI.setRegBank(Copy, Bank);
2016 MRI.setRegBank(Reg, Bank);
2020bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2022 const OperandsMapper &OpdMapper)
const {
2029 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2031 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2033 LLT VecTy =
MRI.getType(VecReg);
2044 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2046 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2048 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2051 (DstBank == AMDGPU::SGPRRegBank &&
2052 SrcBank == AMDGPU::SGPRRegBank &&
2053 InsBank == AMDGPU::SGPRRegBank &&
2054 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2055 : AMDGPU::VCCRegBank;
2058 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2059 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
2060 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2065 unsigned NumLanes = InsRegs.size();
2068 InsRegs.push_back(
MI.getOperand(2).getReg());
2070 EltTy =
MRI.getType(InsRegs[0]);
2073 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2076 for (
unsigned I = 0;
I < NumElem; ++
I) {
2077 auto IC =
B.buildConstant(
S32,
I);
2078 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2080 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2082 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2084 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2095 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2096 B.buildBuildVector(
MI.getOperand(0), Ops);
2098 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2099 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2100 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2103 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2104 MI.eraseFromParent();
2117 if (DefRegs.
empty()) {
2125 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2136 if (Src0Regs.
empty())
2141 if (Src1Regs.
empty())
2164 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2166 Register Hi =
B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2167 Register MulLoHi =
B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2169 Register MulHiLo =
B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2170 B.buildAdd(DefRegs[1],
Add, MulHiLo);
2171 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2173 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2174 MI.eraseFromParent();
2180 B.setInstrAndDebugLoc(
MI);
2181 unsigned Opc =
MI.getOpcode();
2184 case AMDGPU::G_CONSTANT:
2185 case AMDGPU::G_IMPLICIT_DEF: {
2187 LLT DstTy =
MRI.getType(DstReg);
2193 if (DstBank == &AMDGPU::VCCRegBank)
2196 if (DefRegs.
empty())
2199 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2202 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2204 MI.getOperand(0).setReg(NewDstReg);
2205 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2206 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2207 MI.getOperand(1).setCImm(
2211 MRI.setRegBank(NewDstReg, *DstBank);
2212 B.buildTrunc(DefRegs[0], NewDstReg);
2215 case AMDGPU::G_PHI: {
2217 LLT DstTy =
MRI.getType(DstReg);
2224 if (DstBank == &AMDGPU::VCCRegBank) {
2231 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2235 if (SrcBank != &AMDGPU::VCCRegBank) {
2240 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2241 MI.getOperand(
I).setReg(Copy.getReg(0));
2252 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2253 B.setInsertPt(
B.getMBB(),
MI);
2261 case AMDGPU::G_FCMP:
2265 case AMDGPU::G_ICMP:
2266 case AMDGPU::G_UADDO:
2267 case AMDGPU::G_USUBO:
2268 case AMDGPU::G_UADDE:
2269 case AMDGPU::G_SADDE:
2270 case AMDGPU::G_USUBE:
2271 case AMDGPU::G_SSUBE: {
2272 unsigned BoolDstOp =
2273 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2274 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2278 if (DstBank != &AMDGPU::SGPRRegBank)
2281 const bool HasCarryIn =
MI.getNumOperands() == 5;
2287 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2288 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2292 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2293 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2294 MI.getOperand(4).setReg(NewSrcReg);
2298 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2303 if (DefRegs.
empty())
2305 B.buildTrunc(DefRegs[0], NewDstReg);
2308 case AMDGPU::G_SELECT: {
2310 LLT DstTy =
MRI.getType(DstReg);
2313 if (CondRegs.
empty())
2320 if (CondBank == &AMDGPU::SGPRRegBank) {
2323 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2325 MI.getOperand(1).setReg(NewCondReg);
2326 B.buildZExt(NewCondReg, CondRegs[0]);
2339 if (DefRegs.
empty()) {
2344 if (Src1Regs.
empty())
2350 if (Src2Regs.
empty())
2357 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0]);
2358 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1]);
2360 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2361 MI.eraseFromParent();
2364 case AMDGPU::G_BRCOND: {
2365 Register CondReg =
MI.getOperand(0).getReg();
2370 if (CondBank == &AMDGPU::SGPRRegBank) {
2373 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2375 MI.getOperand(0).setReg(NewCondReg);
2376 B.buildZExt(NewCondReg, CondReg);
2384 case AMDGPU::G_XOR: {
2388 LLT DstTy =
MRI.getType(DstReg);
2393 if (DstBank == &AMDGPU::VCCRegBank)
2397 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2415 if (DefRegs.
empty()) {
2422 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2428 if (Src0Regs.
empty())
2433 if (Src1Regs.
empty())
2440 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
2441 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});
2443 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2444 MI.eraseFromParent();
2447 case AMDGPU::G_ABS: {
2453 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2455 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2468 case AMDGPU::G_LSHR:
2469 case AMDGPU::G_ASHR:
2470 case AMDGPU::G_SMIN:
2471 case AMDGPU::G_SMAX:
2472 case AMDGPU::G_UMIN:
2473 case AMDGPU::G_UMAX: {
2475 LLT DstTy =
MRI.getType(DstReg);
2492 if (DstBank == &AMDGPU::VGPRRegBank)
2498 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2500 if (DstTy.
isVector() && Opc == AMDGPU::G_ABS) {
2503 std::tie(WideSrcLo, WideSrcHi) =
2505 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2506 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2507 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2508 MI.eraseFromParent();
2517 std::tie(WideSrc0Lo, WideSrc0Hi)
2519 std::tie(WideSrc1Lo, WideSrc1Hi)
2521 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2522 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2523 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2524 MI.eraseFromParent();
2532 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2533 Opc == AMDGPU::G_ASHR) {
2534 B.setInsertPt(*
MBB,
MI.getIterator());
2542 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2543 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2557 Register SrcReg0 =
MI.getOperand(1).getReg();
2558 Register SrcReg1 =
MI.getOperand(2).getReg();
2561 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2562 "that handles only 64-bit operands.");
2568 if (DstBank == &AMDGPU::SGPRRegBank) {
2569 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2570 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2571 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2572 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2578 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2579 "The destination operand should be in vector registers.");
2584 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2585 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2587 B.buildTrunc(Op0L, SrcReg0);
2590 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2591 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2593 B.buildTrunc(Op1L, SrcReg1);
2595 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2596 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2597 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2601 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2602 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2603 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2604 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2605 MI.eraseFromParent();
2608 case AMDGPU::G_SEXT_INREG: {
2610 if (SrcRegs.
empty())
2614 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2621 int Amt =
MI.getOperand(2).getImm();
2627 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2629 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2631 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2634 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2638 B.buildCopy(DstRegs[0], SrcRegs[0]);
2639 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2643 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2644 MI.eraseFromParent();
2647 case AMDGPU::G_CTPOP:
2648 case AMDGPU::G_BITREVERSE: {
2651 if (DstBank == &AMDGPU::SGPRRegBank)
2656 LLT Ty =
MRI.getType(SrcReg);
2660 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2669 case AMDGPU::G_AMDGPU_FFBH_U32:
2670 case AMDGPU::G_AMDGPU_FFBL_B32:
2671 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2672 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2675 if (DstBank == &AMDGPU::SGPRRegBank)
2680 LLT Ty =
MRI.getType(SrcReg);
2690 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2692 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2693 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2694 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2695 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2697 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2698 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx]});
2699 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx ^ 1]});
2701 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2703 : AMDGPU::G_UADDSAT;
2704 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2706 B.buildUMin(DstReg,
X,
Y);
2707 MI.eraseFromParent();
2710 case AMDGPU::G_SEXT:
2711 case AMDGPU::G_ZEXT:
2712 case AMDGPU::G_ANYEXT: {
2714 LLT SrcTy =
MRI.getType(SrcReg);
2715 const bool Signed = Opc == AMDGPU::G_SEXT;
2723 LLT DstTy =
MRI.getType(DstReg);
2725 SrcBank != &AMDGPU::SGPRRegBank &&
2726 SrcBank != &AMDGPU::VCCRegBank &&
2736 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2737 }
else if (Opc == AMDGPU::G_ZEXT) {
2738 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2740 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2744 MRI.setRegBank(DstReg, *SrcBank);
2745 MI.eraseFromParent();
2755 if (SrcBank == &AMDGPU::VCCRegBank) {
2762 const bool UseSel64 = DstSize > 32 &&
2763 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2767 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2768 auto False =
B.buildConstant(SelType, 0);
2770 MRI.setRegBank(True.getReg(0), *DstBank);
2771 MRI.setRegBank(False.getReg(0), *DstBank);
2772 MRI.setRegBank(DstReg, *DstBank);
2775 B.buildSelect(DefRegs[0], SrcReg, True, False);
2777 }
else if (DstSize < 32) {
2778 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2779 MRI.setRegBank(Sel.getReg(0), *DstBank);
2780 B.buildTrunc(DstReg, Sel);
2782 B.buildSelect(DstReg, SrcReg, True, False);
2785 MI.eraseFromParent();
2791 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2800 LLT DstTy =
MRI.getType(DstReg);
2801 LLT SrcTy =
MRI.getType(SrcReg);
2803 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2815 unsigned ConstOffset;
2816 std::tie(BaseIdxReg, ConstOffset) =
2823 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2828 if (ShouldMoveIndexIntoLoop)
2829 MI.getOperand(2).setReg(BaseIdxReg);
2835 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2836 SrcBank == &AMDGPU::SGPRRegBank;
2837 if (DstRegs.
empty()) {
2842 if (NeedCopyToVGPR) {
2844 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2845 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2846 MI.getOperand(0).setReg(TmpReg);
2847 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2854 if (ShouldMoveIndexIntoLoop)
2864 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2865 auto One =
B.buildConstant(
S32, 1);
2876 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2877 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2879 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2880 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2882 MRI.setRegBank(DstReg, *DstBank);
2883 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2884 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2885 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2886 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2890 MI.eraseFromParent();
2896 B.setInstr(*Span.
begin());
2897 MI.eraseFromParent();
2901 if (NeedCopyToVGPR) {
2905 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2906 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2908 Extract0->getOperand(0).setReg(TmpReg0);
2909 Extract1->getOperand(0).setReg(TmpReg1);
2917 if (ShouldMoveIndexIntoLoop)
2922 case AMDGPU::G_INSERT_VECTOR_ELT: {
2926 LLT VecTy =
MRI.getType(DstReg);
2932 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2934 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2942 LLT InsTy =
MRI.getType(InsReg);
2946 unsigned ConstOffset;
2947 std::tie(BaseIdxReg, ConstOffset) =
2954 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2959 if (ShouldMoveIndexIntoLoop)
2960 MI.getOperand(3).setReg(BaseIdxReg);
2963 if (InsRegs.
empty()) {
2967 if (ShouldMoveIndexIntoLoop) {
2979 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2980 auto One =
B.buildConstant(
S32, 1);
2989 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2990 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2992 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
2993 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3002 MRI.setRegBank(InsReg, *InsSrcBank);
3003 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3004 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3005 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3006 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3007 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3008 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3013 B.setInsertPt(
B.getMBB(),
MI);
3014 B.buildBitcast(DstReg, InsHi);
3015 MI.eraseFromParent();
3019 B.setInstr(*Span.
begin());
3020 MI.eraseFromParent();
3031 B.buildBitcast(DstReg, InsHi);
3034 if (ShouldMoveIndexIntoLoop)
3039 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3040 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3041 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3042 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3043 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3044 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3045 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3046 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3047 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3048 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3049 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3050 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3051 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3052 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3053 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3054 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3055 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3060 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3061 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3062 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3063 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3064 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3065 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3066 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3067 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3068 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3069 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3070 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3071 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
3076 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3077 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16:
3078 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3079 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3084 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3089 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3090 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3091 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3092 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3093 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3097 case AMDGPU::G_INTRINSIC:
3098 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3100 case Intrinsic::amdgcn_readlane: {
3111 case Intrinsic::amdgcn_writelane: {
3121 case Intrinsic::amdgcn_interp_p1:
3122 case Intrinsic::amdgcn_interp_p2:
3123 case Intrinsic::amdgcn_interp_mov:
3124 case Intrinsic::amdgcn_interp_p1_f16:
3125 case Intrinsic::amdgcn_interp_p2_f16:
3126 case Intrinsic::amdgcn_lds_param_load: {
3134 case Intrinsic::amdgcn_interp_inreg_p10:
3135 case Intrinsic::amdgcn_interp_inreg_p2:
3136 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3137 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3138 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3139 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3142 case Intrinsic::amdgcn_permlane16:
3143 case Intrinsic::amdgcn_permlanex16: {
3151 case Intrinsic::amdgcn_sbfe:
3154 case Intrinsic::amdgcn_ubfe:
3157 case Intrinsic::amdgcn_inverse_ballot:
3158 case Intrinsic::amdgcn_s_bitreplicate:
3159 case Intrinsic::amdgcn_s_quadmask:
3160 case Intrinsic::amdgcn_s_wqm:
3164 case Intrinsic::amdgcn_ballot:
3170 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3171 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3172 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3173 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3183 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3184 unsigned N =
MI.getNumExplicitOperands() - 2;
3189 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3190 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3191 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3193 case Intrinsic::amdgcn_ds_ordered_add:
3194 case Intrinsic::amdgcn_ds_ordered_swap: {
3201 case Intrinsic::amdgcn_ds_gws_init:
3202 case Intrinsic::amdgcn_ds_gws_barrier:
3203 case Intrinsic::amdgcn_ds_gws_sema_br: {
3209 case Intrinsic::amdgcn_ds_gws_sema_v:
3210 case Intrinsic::amdgcn_ds_gws_sema_p:
3211 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3216 case Intrinsic::amdgcn_ds_append:
3217 case Intrinsic::amdgcn_ds_consume: {
3221 case Intrinsic::amdgcn_s_sendmsg:
3222 case Intrinsic::amdgcn_s_sendmsghalt: {
3227 case Intrinsic::amdgcn_s_setreg: {
3231 case Intrinsic::amdgcn_s_ttracedata:
3234 case Intrinsic::amdgcn_raw_buffer_load_lds:
3235 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3242 case Intrinsic::amdgcn_struct_buffer_load_lds:
3243 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3250 case Intrinsic::amdgcn_global_load_lds: {
3255 case Intrinsic::amdgcn_lds_direct_load: {
3261 case Intrinsic::amdgcn_exp_row:
3265 case Intrinsic::amdgcn_s_sleep_var:
3269 case Intrinsic::amdgcn_s_barrier_signal_var:
3270 case Intrinsic::amdgcn_s_barrier_join:
3271 case Intrinsic::amdgcn_s_wakeup_barrier:
3274 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
3277 case Intrinsic::amdgcn_s_barrier_init:
3281 case Intrinsic::amdgcn_s_get_barrier_state: {
3291 if (RSrcIntrin->IsImage) {
3302 case AMDGPU::G_SI_CALL: {
3313 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3314 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3320 unsigned NonCopyInstrsLen = 0;
3326 while (Start->getOpcode() != FrameSetupOpcode) {
3328 bool IsCopy =
false;
3329 if (Start->getOpcode() == AMDGPU::COPY) {
3330 auto &Dst = Start->getOperand(0);
3333 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3338 auto &Src = Start->getOperand(1);
3341 IsCopy =
Info->getScratchRSrcReg() == Reg;
3349 NonCopyInstrsLen = NonCopyInstrs.
size();
3354 NonCopyInstrs.
resize(NonCopyInstrsLen);
3356 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3362 NonCopyInstrs.
clear();
3363 NonCopyInstrsLen = 0;
3366 while (
End->getOpcode() != FrameDestroyOpcode) {
3368 bool IsCopy =
false;
3369 if (
End->getOpcode() == AMDGPU::COPY) {
3370 auto &Src =
End->getOperand(1);
3373 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3379 NonCopyInstrsLen = NonCopyInstrs.
size();
3384 NonCopyInstrs.
resize(NonCopyInstrsLen);
3388 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3393 B.setInsertPt(
B.getMBB(), Start);
3397 case AMDGPU::G_LOAD:
3398 case AMDGPU::G_ZEXTLOAD:
3399 case AMDGPU::G_SEXTLOAD: {
3404 case AMDGPU::G_DYN_STACKALLOC:
3407 case AMDGPU::G_STACKRESTORE: {
3412 case AMDGPU::G_SBFX:
3415 case AMDGPU::G_UBFX:
3418 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3419 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3422 case AMDGPU::G_PREFETCH: {
3424 MI.eraseFromParent();
3429 if (PtrBank == AMDGPU::VGPRRegBankID) {
3430 MI.eraseFromParent();
3433 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3436 MI.eraseFromParent();
3454 if (RB0 == AMDGPU::InvalidRegBankID)
3456 if (RB1 == AMDGPU::InvalidRegBankID)
3459 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3460 return AMDGPU::SGPRRegBankID;
3462 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3463 return AMDGPU::AGPRRegBankID;
3465 return AMDGPU::VGPRRegBankID;
3469 if (RB0 == AMDGPU::InvalidRegBankID)
3471 if (RB1 == AMDGPU::InvalidRegBankID)
3477 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3478 return AMDGPU::VCCRegBankID;
3486 unsigned RegBank = AMDGPU::InvalidRegBankID;
3494 if (RegBank == AMDGPU::VGPRRegBankID)
3510 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3523 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3529 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3532 MI.getNumOperands());
3545 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3551 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3552 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3556 MI.getNumOperands());
3565 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
3571 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3575 MI.getNumOperands());
3581 int RsrcIdx)
const {
3584 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3586 const int NumOps =
MI.getNumOperands();
3591 for (
int I = 0;
I != NumOps; ++
I) {
3592 if (!
MI.getOperand(
I).isReg())
3606 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3611 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3614 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3625 LLT PtrTy =
MRI.getType(PtrReg);
3629 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3634 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3645 LLT PtrTy =
MRI.getType(PtrReg);
3657 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3658 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3660 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3665 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3667 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3670 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3671 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3674 OpdsMapping[0] = ValMapping;
3675 OpdsMapping[1] = PtrMapping;
3700 return AMDGPU::getValueMapping(Bank,
Size);
3708 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3716 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3733 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3740 assert(SrcBank &&
"src bank should have been assigned already");
3745 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3750 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3752 OpdsMapping[0] = &ValMap;
3753 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3754 OpdsMapping[1] = &ValMap;
3761 if (
MI.isRegSequence()) {
3764 unsigned BankID = AMDGPU::SGPRRegBankID;
3766 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3770 if (OpBank != AMDGPU::SGPRRegBankID) {
3771 BankID = AMDGPU::VGPRRegBankID;
3787 if (
auto *
PHI = dyn_cast<GPhi>(&
MI)) {
3788 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3793 ResultBank = DstBank->
getID();
3795 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3800 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3801 ResultBank = AMDGPU::VGPRRegBankID;
3806 unsigned OpBank = Bank->
getID();
3810 assert(ResultBank != AMDGPU::InvalidRegBankID);
3812 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3827 switch (
MI.getOpcode()) {
3834 case AMDGPU::G_MUL: {
3835 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3840 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3841 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3842 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3844 TargetBankID = DstBank->
getID();
3845 if (DstBank == &AMDGPU::VCCRegBank) {
3846 TargetBankID = AMDGPU::VCCRegBankID;
3847 BankLHS = AMDGPU::VCCRegBankID;
3848 BankRHS = AMDGPU::VCCRegBankID;
3851 AMDGPU::SGPRRegBankID);
3853 AMDGPU::SGPRRegBankID);
3857 AMDGPU::VCCRegBankID);
3859 AMDGPU::VCCRegBankID);
3862 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3863 TargetBankID = AMDGPU::VGPRRegBankID;
3864 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3865 TargetBankID = AMDGPU::VCCRegBankID;
3866 BankLHS = AMDGPU::VCCRegBankID;
3867 BankRHS = AMDGPU::VCCRegBankID;
3868 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3869 TargetBankID = AMDGPU::SGPRRegBankID;
3873 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3874 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3875 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3882 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3883 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3885 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3887 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3890 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3898 case AMDGPU::G_PTR_ADD:
3899 case AMDGPU::G_PTRMASK:
3903 case AMDGPU::G_LSHR:
3904 case AMDGPU::G_ASHR:
3905 case AMDGPU::G_UADDO:
3906 case AMDGPU::G_USUBO:
3907 case AMDGPU::G_UADDE:
3908 case AMDGPU::G_SADDE:
3909 case AMDGPU::G_USUBE:
3910 case AMDGPU::G_SSUBE:
3911 case AMDGPU::G_SMIN:
3912 case AMDGPU::G_SMAX:
3913 case AMDGPU::G_UMIN:
3914 case AMDGPU::G_UMAX:
3916 case AMDGPU::G_SHUFFLE_VECTOR:
3917 case AMDGPU::G_SBFX:
3918 case AMDGPU::G_UBFX:
3919 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
3920 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
3924 case AMDGPU::G_FADD:
3925 case AMDGPU::G_FSUB:
3926 case AMDGPU::G_FMUL:
3928 case AMDGPU::G_FFLOOR:
3929 case AMDGPU::G_FCEIL:
3930 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
3931 case AMDGPU::G_FMINNUM:
3932 case AMDGPU::G_FMAXNUM:
3933 case AMDGPU::G_FMINIMUM:
3934 case AMDGPU::G_FMAXIMUM:
3935 case AMDGPU::G_INTRINSIC_TRUNC:
3936 case AMDGPU::G_STRICT_FADD:
3937 case AMDGPU::G_STRICT_FSUB:
3938 case AMDGPU::G_STRICT_FMUL:
3939 case AMDGPU::G_STRICT_FMA: {
3940 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
3947 case AMDGPU::G_FPTOSI:
3948 case AMDGPU::G_FPTOUI:
3949 case AMDGPU::G_SITOFP:
3950 case AMDGPU::G_UITOFP: {
3951 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3952 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3958 case AMDGPU::G_FPTRUNC:
3959 case AMDGPU::G_FPEXT: {
3960 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3961 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3967 case AMDGPU::G_FSQRT:
3968 case AMDGPU::G_FEXP2:
3969 case AMDGPU::G_FLOG2: {
3970 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3976 case AMDGPU::G_SADDSAT:
3977 case AMDGPU::G_SSUBSAT:
3978 case AMDGPU::G_UADDSAT:
3979 case AMDGPU::G_USUBSAT:
3980 case AMDGPU::G_FMAD:
3981 case AMDGPU::G_FLDEXP:
3982 case AMDGPU::G_FMINNUM_IEEE:
3983 case AMDGPU::G_FMAXNUM_IEEE:
3984 case AMDGPU::G_FCANONICALIZE:
3985 case AMDGPU::G_STRICT_FLDEXP:
3986 case AMDGPU::G_BSWAP:
3987 case AMDGPU::G_FSHR:
3988 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
3989 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
3990 case AMDGPU::G_AMDGPU_RCP_IFLAG:
3991 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
3992 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
3993 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
3994 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
3995 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
3996 case AMDGPU::G_AMDGPU_SMED3:
3997 case AMDGPU::G_AMDGPU_FMED3:
3999 case AMDGPU::G_UMULH:
4000 case AMDGPU::G_SMULH: {
4005 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4006 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4015 bool AllSalu =
true;
4016 bool MulSalu =
true;
4017 for (
unsigned i = 0; i < 5; ++i) {
4020 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4022 if (i == 2 || i == 3) {
4040 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4041 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4042 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4043 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4044 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4047 case AMDGPU::G_IMPLICIT_DEF: {
4048 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4049 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4052 case AMDGPU::G_FCONSTANT:
4053 case AMDGPU::G_CONSTANT:
4054 case AMDGPU::G_GLOBAL_VALUE:
4055 case AMDGPU::G_BLOCK_ADDR:
4056 case AMDGPU::G_READSTEADYCOUNTER:
4057 case AMDGPU::G_READCYCLECOUNTER: {
4058 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4059 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4062 case AMDGPU::G_FRAME_INDEX: {
4065 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4066 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4069 case AMDGPU::G_DYN_STACKALLOC: {
4071 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4073 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4076 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4081 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4082 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4085 case AMDGPU::G_INSERT: {
4090 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4091 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4092 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4093 OpdsMapping[3] =
nullptr;
4096 case AMDGPU::G_EXTRACT: {
4100 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4101 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4102 OpdsMapping[2] =
nullptr;
4105 case AMDGPU::G_BUILD_VECTOR:
4106 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4107 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4110 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4113 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4115 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4116 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4117 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4123 case AMDGPU::G_MERGE_VALUES:
4124 case AMDGPU::G_CONCAT_VECTORS: {
4126 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4127 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4129 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4131 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4132 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4135 case AMDGPU::G_BITREVERSE:
4136 case AMDGPU::G_BITCAST:
4137 case AMDGPU::G_INTTOPTR:
4138 case AMDGPU::G_PTRTOINT:
4139 case AMDGPU::G_FABS:
4140 case AMDGPU::G_FNEG: {
4141 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4143 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4146 case AMDGPU::G_AMDGPU_FFBH_U32:
4147 case AMDGPU::G_AMDGPU_FFBL_B32:
4148 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4149 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4150 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4152 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4153 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4156 case AMDGPU::G_CTPOP: {
4157 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4159 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4164 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4167 case AMDGPU::G_TRUNC: {
4173 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4174 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4177 case AMDGPU::G_ZEXT:
4178 case AMDGPU::G_SEXT:
4179 case AMDGPU::G_ANYEXT:
4180 case AMDGPU::G_SEXT_INREG: {
4189 switch (SrcBank->
getID()) {
4190 case AMDGPU::SGPRRegBankID:
4191 DstBank = AMDGPU::SGPRRegBankID;
4194 DstBank = AMDGPU::VGPRRegBankID;
4200 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4201 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4205 case AMDGPU::G_IS_FPCLASS: {
4207 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4208 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4209 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4210 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4213 case AMDGPU::G_STORE: {
4215 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4220 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4221 OpdsMapping[0] = ValMapping;
4225 case AMDGPU::G_ICMP:
4226 case AMDGPU::G_FCMP: {
4227 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4232 AMDGPU::SGPRRegBankID);
4236 auto canUseSCCICMP = [&]() {
4239 return Size == 32 ||
4244 auto canUseSCCFCMP = [&]() {
4248 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4249 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4250 Op2Bank == AMDGPU::SGPRRegBankID &&
4251 Op3Bank == AMDGPU::SGPRRegBankID &&
4252 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4254 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4255 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4259 const unsigned ResultSize = 1;
4261 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4262 OpdsMapping[1] =
nullptr;
4263 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4264 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4267 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4270 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4271 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4272 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4274 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4276 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4277 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4280 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4283 case AMDGPU::G_INSERT_VECTOR_ELT: {
4285 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4287 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4288 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4289 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4293 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4294 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4298 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4299 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4302 assert(InsertSize == 32 || InsertSize == 64);
4303 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4307 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4310 case AMDGPU::G_UNMERGE_VALUES: {
4315 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4317 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4321 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4322 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4323 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4324 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4325 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4326 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4327 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4328 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4329 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4330 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4331 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4332 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4333 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4334 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4335 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4336 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4337 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4356 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4357 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4358 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4359 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4360 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4361 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4362 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4363 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4364 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4365 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4366 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4367 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4368 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4369 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16:
4370 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4371 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4394 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4420 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4421 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4422 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4423 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4424 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4432 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4433 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4434 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4436 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4437 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4440 case AMDGPU::G_INTRINSIC:
4441 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4445 case Intrinsic::amdgcn_div_fmas:
4446 case Intrinsic::amdgcn_div_fixup:
4447 case Intrinsic::amdgcn_trig_preop:
4448 case Intrinsic::amdgcn_sin:
4449 case Intrinsic::amdgcn_cos:
4450 case Intrinsic::amdgcn_log_clamp:
4451 case Intrinsic::amdgcn_rcp_legacy:
4452 case Intrinsic::amdgcn_rsq_legacy:
4453 case Intrinsic::amdgcn_rsq_clamp:
4454 case Intrinsic::amdgcn_fmul_legacy:
4455 case Intrinsic::amdgcn_fma_legacy:
4456 case Intrinsic::amdgcn_frexp_mant:
4457 case Intrinsic::amdgcn_frexp_exp:
4458 case Intrinsic::amdgcn_fract:
4459 case Intrinsic::amdgcn_cvt_pknorm_i16:
4460 case Intrinsic::amdgcn_cvt_pknorm_u16:
4461 case Intrinsic::amdgcn_cvt_pk_i16:
4462 case Intrinsic::amdgcn_cvt_pk_u16:
4463 case Intrinsic::amdgcn_fmed3:
4464 case Intrinsic::amdgcn_cubeid:
4465 case Intrinsic::amdgcn_cubema:
4466 case Intrinsic::amdgcn_cubesc:
4467 case Intrinsic::amdgcn_cubetc:
4468 case Intrinsic::amdgcn_sffbh:
4469 case Intrinsic::amdgcn_fmad_ftz:
4470 case Intrinsic::amdgcn_mbcnt_lo:
4471 case Intrinsic::amdgcn_mbcnt_hi:
4472 case Intrinsic::amdgcn_mul_u24:
4473 case Intrinsic::amdgcn_mul_i24:
4474 case Intrinsic::amdgcn_mulhi_u24:
4475 case Intrinsic::amdgcn_mulhi_i24:
4476 case Intrinsic::amdgcn_lerp:
4477 case Intrinsic::amdgcn_sad_u8:
4478 case Intrinsic::amdgcn_msad_u8:
4479 case Intrinsic::amdgcn_sad_hi_u8:
4480 case Intrinsic::amdgcn_sad_u16:
4481 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4482 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4483 case Intrinsic::amdgcn_mqsad_u32_u8:
4484 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4485 case Intrinsic::amdgcn_alignbyte:
4486 case Intrinsic::amdgcn_perm:
4487 case Intrinsic::amdgcn_fdot2:
4488 case Intrinsic::amdgcn_sdot2:
4489 case Intrinsic::amdgcn_udot2:
4490 case Intrinsic::amdgcn_sdot4:
4491 case Intrinsic::amdgcn_udot4:
4492 case Intrinsic::amdgcn_sdot8:
4493 case Intrinsic::amdgcn_udot8:
4494 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4495 case Intrinsic::amdgcn_fdot2_f16_f16:
4496 case Intrinsic::amdgcn_fdot2_f32_bf16:
4497 case Intrinsic::amdgcn_sudot4:
4498 case Intrinsic::amdgcn_sudot8:
4499 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4500 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4501 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4502 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4503 case Intrinsic::amdgcn_cvt_f32_fp8:
4504 case Intrinsic::amdgcn_cvt_f32_bf8:
4505 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4506 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4507 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4508 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4509 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4510 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4511 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4512 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4513 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4514 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4515 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4516 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4517 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4518 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4519 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4520 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4521 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4522 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4523 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4524 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4525 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4526 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4527 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4528 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4529 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4530 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4531 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4532 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4533 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4534 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4536 case Intrinsic::amdgcn_log:
4537 case Intrinsic::amdgcn_exp2:
4538 case Intrinsic::amdgcn_rcp:
4539 case Intrinsic::amdgcn_rsq:
4540 case Intrinsic::amdgcn_sqrt: {
4541 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4547 case Intrinsic::amdgcn_sbfe:
4548 case Intrinsic::amdgcn_ubfe:
4552 case Intrinsic::amdgcn_ds_swizzle:
4553 case Intrinsic::amdgcn_ds_permute:
4554 case Intrinsic::amdgcn_ds_bpermute:
4555 case Intrinsic::amdgcn_update_dpp:
4556 case Intrinsic::amdgcn_mov_dpp8:
4557 case Intrinsic::amdgcn_mov_dpp:
4558 case Intrinsic::amdgcn_strict_wwm:
4559 case Intrinsic::amdgcn_wwm:
4560 case Intrinsic::amdgcn_strict_wqm:
4561 case Intrinsic::amdgcn_wqm:
4562 case Intrinsic::amdgcn_softwqm:
4563 case Intrinsic::amdgcn_set_inactive:
4564 case Intrinsic::amdgcn_set_inactive_chain_arg:
4565 case Intrinsic::amdgcn_permlane64:
4567 case Intrinsic::amdgcn_cvt_pkrtz:
4571 case Intrinsic::amdgcn_kernarg_segment_ptr:
4572 case Intrinsic::amdgcn_s_getpc:
4573 case Intrinsic::amdgcn_groupstaticsize:
4574 case Intrinsic::amdgcn_reloc_constant:
4575 case Intrinsic::returnaddress: {
4576 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4577 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4580 case Intrinsic::amdgcn_wqm_vote: {
4581 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4582 OpdsMapping[0] = OpdsMapping[2]
4583 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4586 case Intrinsic::amdgcn_ps_live: {
4587 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4590 case Intrinsic::amdgcn_div_scale: {
4591 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4592 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4593 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4594 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4596 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4597 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4598 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4601 case Intrinsic::amdgcn_class: {
4602 Register Src0Reg =
MI.getOperand(2).getReg();
4603 Register Src1Reg =
MI.getOperand(3).getReg();
4604 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4605 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4606 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4607 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4608 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4609 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4612 case Intrinsic::amdgcn_icmp:
4613 case Intrinsic::amdgcn_fcmp: {
4614 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4616 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4617 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4618 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4619 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4622 case Intrinsic::amdgcn_readlane: {
4625 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4627 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4630 case Intrinsic::amdgcn_readfirstlane: {
4631 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4632 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4633 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4634 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4637 case Intrinsic::amdgcn_writelane: {
4638 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4640 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4643 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4645 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4649 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4650 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4651 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4654 case Intrinsic::amdgcn_if_break: {
4656 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4657 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4658 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4661 case Intrinsic::amdgcn_permlane16:
4662 case Intrinsic::amdgcn_permlanex16: {
4664 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4665 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4666 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4671 case Intrinsic::amdgcn_permlane16_var:
4672 case Intrinsic::amdgcn_permlanex16_var: {
4674 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4675 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4676 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4677 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4680 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4681 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4682 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4683 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4684 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4685 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4686 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4687 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4688 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4689 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4690 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4691 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4692 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4693 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4694 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4695 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4696 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4697 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4698 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4699 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4700 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4701 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4702 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4703 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4704 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4705 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4706 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4707 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4708 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4709 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4710 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4711 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4712 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4713 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4714 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4715 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4716 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4717 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4718 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: {
4727 Info->mayNeedAGPRs()
4733 Info->mayNeedAGPRs()
4738 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
4739 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
4740 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
4741 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
4742 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
4743 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
4744 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
4745 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
4746 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
4747 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
4748 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
4749 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
4750 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
4751 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: {
4760 case Intrinsic::amdgcn_interp_p1:
4761 case Intrinsic::amdgcn_interp_p2:
4762 case Intrinsic::amdgcn_interp_mov:
4763 case Intrinsic::amdgcn_interp_p1_f16:
4764 case Intrinsic::amdgcn_interp_p2_f16:
4765 case Intrinsic::amdgcn_lds_param_load: {
4766 const int M0Idx =
MI.getNumOperands() - 1;
4767 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4769 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4771 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4772 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4773 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4777 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4780 case Intrinsic::amdgcn_interp_inreg_p10:
4781 case Intrinsic::amdgcn_interp_inreg_p2:
4782 case Intrinsic::amdgcn_interp_inreg_p10_f16:
4783 case Intrinsic::amdgcn_interp_inreg_p2_f16:
4784 case Intrinsic::amdgcn_interp_p10_rtz_f16:
4785 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
4786 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4787 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4788 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4789 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4790 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4793 case Intrinsic::amdgcn_ballot: {
4794 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4795 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4796 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4797 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
4800 case Intrinsic::amdgcn_inverse_ballot: {
4802 Register MaskReg =
MI.getOperand(2).getReg();
4803 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4804 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4805 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4806 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4809 case Intrinsic::amdgcn_s_quadmask:
4810 case Intrinsic::amdgcn_s_wqm: {
4811 Register MaskReg =
MI.getOperand(2).getReg();
4812 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4813 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4814 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
4815 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4818 case Intrinsic::amdgcn_wave_reduce_umin:
4819 case Intrinsic::amdgcn_wave_reduce_umax: {
4820 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4821 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4822 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4825 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
4828 case Intrinsic::amdgcn_s_bitreplicate:
4829 Register MaskReg =
MI.getOperand(2).getReg();
4830 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4831 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
4832 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
4836 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4837 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4838 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4839 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4842 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
4849 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
4850 unsigned N =
MI.getNumExplicitOperands() - 2;
4851 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
4855 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4858 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4861 for (
unsigned I = 2;
I <
N; ++
I) {
4862 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
4863 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4868 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
4869 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
4870 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
4872 case Intrinsic::amdgcn_s_getreg:
4873 case Intrinsic::amdgcn_s_memtime:
4874 case Intrinsic::amdgcn_s_memrealtime:
4875 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
4876 case Intrinsic::amdgcn_s_sendmsg_rtn: {
4877 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4878 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4881 case Intrinsic::amdgcn_global_atomic_fadd:
4882 case Intrinsic::amdgcn_global_atomic_csub:
4883 case Intrinsic::amdgcn_global_atomic_fmin:
4884 case Intrinsic::amdgcn_global_atomic_fmax:
4885 case Intrinsic::amdgcn_global_atomic_fmin_num:
4886 case Intrinsic::amdgcn_global_atomic_fmax_num:
4887 case Intrinsic::amdgcn_flat_atomic_fadd:
4888 case Intrinsic::amdgcn_flat_atomic_fmin:
4889 case Intrinsic::amdgcn_flat_atomic_fmax:
4890 case Intrinsic::amdgcn_flat_atomic_fmin_num:
4891 case Intrinsic::amdgcn_flat_atomic_fmax_num:
4892 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
4893 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
4894 case Intrinsic::amdgcn_atomic_cond_sub_u32:
4895 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
4896 case Intrinsic::amdgcn_global_load_tr_b64:
4897 case Intrinsic::amdgcn_global_load_tr_b128:
4899 case Intrinsic::amdgcn_ds_ordered_add:
4900 case Intrinsic::amdgcn_ds_ordered_swap:
4901 case Intrinsic::amdgcn_ds_fadd_v2bf16: {
4902 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4903 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4905 AMDGPU::SGPRRegBankID);
4906 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
4907 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4910 case Intrinsic::amdgcn_ds_append:
4911 case Intrinsic::amdgcn_ds_consume: {
4912 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4913 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4917 case Intrinsic::amdgcn_exp_compr:
4918 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4919 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4921 case Intrinsic::amdgcn_exp:
4923 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4924 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4925 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4926 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4928 case Intrinsic::amdgcn_exp_row:
4929 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4930 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4931 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4932 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4935 case Intrinsic::amdgcn_s_sendmsg:
4936 case Intrinsic::amdgcn_s_sendmsghalt: {
4939 AMDGPU::SGPRRegBankID);
4940 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4943 case Intrinsic::amdgcn_s_setreg: {
4946 AMDGPU::SGPRRegBankID);
4947 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4950 case Intrinsic::amdgcn_s_ttracedata: {
4954 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
4957 case Intrinsic::amdgcn_end_cf: {
4959 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4962 case Intrinsic::amdgcn_else: {
4964 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4965 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4966 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4969 case Intrinsic::amdgcn_live_mask: {
4970 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4973 case Intrinsic::amdgcn_wqm_demote:
4974 case Intrinsic::amdgcn_kill: {
4975 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4978 case Intrinsic::amdgcn_raw_buffer_load:
4979 case Intrinsic::amdgcn_raw_ptr_buffer_load:
4980 case Intrinsic::amdgcn_raw_tbuffer_load:
4981 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
4990 case Intrinsic::amdgcn_raw_buffer_load_lds:
4991 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
4998 case Intrinsic::amdgcn_raw_buffer_store:
4999 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5000 case Intrinsic::amdgcn_raw_buffer_store_format:
5001 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5002 case Intrinsic::amdgcn_raw_tbuffer_store:
5003 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5010 case Intrinsic::amdgcn_struct_buffer_load:
5011 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5012 case Intrinsic::amdgcn_struct_tbuffer_load:
5013 case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
5021 case Intrinsic::amdgcn_struct_buffer_load_lds:
5022 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5030 case Intrinsic::amdgcn_struct_buffer_store:
5031 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5032 case Intrinsic::amdgcn_struct_tbuffer_store:
5033 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5041 case Intrinsic::amdgcn_init_exec_from_input: {
5043 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5046 case Intrinsic::amdgcn_ds_gws_init:
5047 case Intrinsic::amdgcn_ds_gws_barrier:
5048 case Intrinsic::amdgcn_ds_gws_sema_br: {
5049 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5053 AMDGPU::SGPRRegBankID);
5054 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5057 case Intrinsic::amdgcn_ds_gws_sema_v:
5058 case Intrinsic::amdgcn_ds_gws_sema_p:
5059 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5062 AMDGPU::SGPRRegBankID);
5063 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5066 case Intrinsic::amdgcn_global_load_lds: {
5071 case Intrinsic::amdgcn_lds_direct_load: {
5072 const int M0Idx =
MI.getNumOperands() - 1;
5073 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5075 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5077 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5078 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5079 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5083 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5086 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5087 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5091 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
5104 case Intrinsic::amdgcn_s_sleep_var:
5107 case Intrinsic::amdgcn_s_barrier_signal_var:
5108 case Intrinsic::amdgcn_s_barrier_join:
5109 case Intrinsic::amdgcn_s_wakeup_barrier:
5112 case Intrinsic::amdgcn_s_barrier_init:
5116 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: {
5117 const unsigned ResultSize = 1;
5119 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5123 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
5124 case Intrinsic::amdgcn_s_barrier_leave: {
5125 const unsigned ResultSize = 1;
5127 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5130 case Intrinsic::amdgcn_s_get_barrier_state: {
5140 case AMDGPU::G_SELECT: {
5141 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5143 AMDGPU::SGPRRegBankID);
5145 AMDGPU::SGPRRegBankID);
5146 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5147 Op3Bank == AMDGPU::SGPRRegBankID;
5149 unsigned CondBankDefault = SGPRSrcs ?
5150 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5153 if (CondBank == AMDGPU::SGPRRegBankID)
5154 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5155 else if (CondBank == AMDGPU::VGPRRegBankID)
5156 CondBank = AMDGPU::VCCRegBankID;
5158 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5159 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5161 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5165 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5166 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5167 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5168 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5170 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5171 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5172 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5173 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5179 case AMDGPU::G_SI_CALL: {
5180 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5186 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5187 if (
MI.getOperand(
I).isReg()) {
5191 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5196 case AMDGPU::G_LOAD:
5197 case AMDGPU::G_ZEXTLOAD:
5198 case AMDGPU::G_SEXTLOAD:
5201 case AMDGPU::G_ATOMICRMW_XCHG:
5202 case AMDGPU::G_ATOMICRMW_ADD:
5203 case AMDGPU::G_ATOMICRMW_SUB:
5204 case AMDGPU::G_ATOMICRMW_AND:
5205 case AMDGPU::G_ATOMICRMW_OR:
5206 case AMDGPU::G_ATOMICRMW_XOR:
5207 case AMDGPU::G_ATOMICRMW_MAX:
5208 case AMDGPU::G_ATOMICRMW_MIN:
5209 case AMDGPU::G_ATOMICRMW_UMAX:
5210 case AMDGPU::G_ATOMICRMW_UMIN:
5211 case AMDGPU::G_ATOMICRMW_FADD:
5212 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5213 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5214 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
5215 case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
5216 case AMDGPU::G_AMDGPU_ATOMIC_FMAX: {
5222 case AMDGPU::G_ATOMIC_CMPXCHG: {
5229 case AMDGPU::G_BRCOND: {
5231 AMDGPU::SGPRRegBankID);
5232 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
5233 if (Bank != AMDGPU::SGPRRegBankID)
5234 Bank = AMDGPU::VCCRegBankID;
5236 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5239 case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
5240 case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
5242 case AMDGPU::G_PREFETCH:
5249 MI.getNumOperands());
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)
static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)
static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)
Implement extending a 32-bit value to a 64-bit value.
static unsigned getExtendOp(unsigned Opc)
static bool isVectorRegisterBank(const RegisterBank &Bank)
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)
Split Ty into 2 pieces.
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)
Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.
static LLT widen96To128(LLT Ty)
static LLT getHalfSizedType(LLT Ty)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isUniformMMO(const MachineMemOperand *MMO)
bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const
bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const
const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const
InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const
bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const
Legalize instruction MI where operands in OpIndices must be SGPRs.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const
Handle register layout difference for f16 images for some subtargets.
const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const
void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override
See RegisterBankInfo::applyMapping.
bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const
bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const
const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool isScalarLoadLegal(const MachineInstr &MI) const
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const
const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const
Split 64-bit value Reg into two 32-bit halves and populate them into Regs.
const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const
Return the mapping for a pointer argument.
unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isDivergentRegBank(const RegisterBank *RB) const override
Returns true if the register bank is considered divergent.
void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const
const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
const GCNSubtarget & Subtarget
const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const
bool isSALUMapping(const MachineInstr &MI) const
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const
bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
const SIRegisterInfo * TRI
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool hasScalarCompareEq64() const
bool hasScalarSubwordLoads() const
bool hasFullRate64Ops() const
bool hasScalarDwordx3Loads() const
bool hasScalarMulHiInsts() const
bool hasPseudoScalarTrans() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasSALUFloatInsts() const
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void createdInstr(MachineInstr &MI)=0
An instruction has been created and inserted into the function.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT getScalarType() const
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
TypeSize getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
const InstructionMapping & getInstrMapping() const
The final mapping of the instruction.
MachineInstr & getMI() const
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
const unsigned * Sizes
Hold the sizes of the register banks for all HwModes.
bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static IntegerType * getInt32Ty(LLVMContext &C)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isFlatGlobalAddrSpace(unsigned AS)
unsigned getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
auto reverse(ContainerTy &&C)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
const RegisterBank * RegBank
Register bank where the partial value lives.
unsigned Length
Length of this mapping in bits.
Helper struct that represents how a value is mapped through different register banks.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
The llvm::once_flag structure.