84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping()
override {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(
S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(
S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0, E =
MI.getNumExplicitDefs();
I != E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
334 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I],
Sizes[
I]);
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
478 switch (
MI.getOpcode()) {
479 case TargetOpcode::G_CONSTANT:
480 case TargetOpcode::G_IMPLICIT_DEF: {
484 { { AMDGPU::VGPRRegBankID }, 1 },
485 { { AMDGPU::SGPRRegBankID }, 1 },
486 { { AMDGPU::VCCRegBankID }, 1 }
489 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
494 case TargetOpcode::G_FCONSTANT:
495 case TargetOpcode::G_FRAME_INDEX:
496 case TargetOpcode::G_GLOBAL_VALUE: {
498 { { AMDGPU::VGPRRegBankID }, 1 },
499 { { AMDGPU::SGPRRegBankID }, 1 }
502 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
504 case TargetOpcode::G_AND:
505 case TargetOpcode::G_OR:
506 case TargetOpcode::G_XOR: {
513 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
514 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
515 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
521 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
522 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
523 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
534 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
535 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
536 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
542 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
543 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
544 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
549 case TargetOpcode::G_LOAD:
550 case TargetOpcode::G_ZEXTLOAD:
551 case TargetOpcode::G_SEXTLOAD: {
553 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
562 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
563 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
571 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
572 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
585 case TargetOpcode::G_SELECT: {
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
590 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
591 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
597 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
598 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
599 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
605 case TargetOpcode::G_UADDE:
606 case TargetOpcode::G_USUBE:
607 case TargetOpcode::G_SADDE:
608 case TargetOpcode::G_SSUBE: {
612 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
615 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
616 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
622 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
623 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
624 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
625 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
630 case AMDGPU::G_BRCOND: {
631 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
636 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
642 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
647 case AMDGPU::G_INTRINSIC:
648 case AMDGPU::G_INTRINSIC_CONVERGENT:
650 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
651 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
666 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
667 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
669 MRI->setRegBank(LoLHS, *Bank);
670 MRI->setRegBank(HiLHS, *Bank);
675 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
686 MRI.setType(Reg, NewTy);
706 LLT Ty =
MRI.getType(Src);
709 if (Bank == &AMDGPU::SGPRRegBank)
715 if (Bank != &AMDGPU::VGPRRegBank) {
717 Src =
B.buildCopy(Ty, Src).getReg(0);
718 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
722 unsigned NumParts = Bits / 32;
729 auto Unmerge =
B.buildUnmerge(
S32, Src);
730 for (
unsigned i = 0; i < NumParts; ++i)
734 for (
unsigned i = 0; i < NumParts; ++i) {
736 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737 MRI.setType(DstPart, NumParts == 1 ? Ty :
S32);
742 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
744 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
786 const unsigned MovExecOpc =
788 const unsigned MovExecTermOpc =
792 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
794 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
796 AMDGPU::EXEC_LO : AMDGPU::EXEC;
799 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
803 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
804 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
807 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
808 .addDef(InitSaveExecReg);
810 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
811 Register NewExec =
MRI.createVirtualRegister(WaveRC);
837 B.setInsertPt(*LoopBB, LoopBB->
end());
839 B.buildInstr(TargetOpcode::PHI)
841 .addReg(InitSaveExecReg)
856 auto NewEnd = BodyBB->
end();
863 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
868 if (!SGPROperandRegs.
count(OldReg))
873 auto OldVal = WaterfalledRegMap.
find(OldReg);
874 if (OldVal != WaterfalledRegMap.
end()) {
875 Op.setReg(OldVal->second);
880 LLT OpTy =
MRI.getType(OpReg);
883 if (OpBank != &AMDGPU::VGPRRegBank) {
886 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
887 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
895 bool Is64 = OpSize % 64 == 0;
896 unsigned PartSize = Is64 ? 64 : 32;
898 unsigned NumParts = OpSize / PartSize;
904 CurrentLaneParts.
push_back(CurrentLaneReg);
906 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
907 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
908 for (
unsigned i = 0; i < NumParts; ++i) {
910 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
911 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
912 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
916 for (
unsigned i = 0; i < NumParts; ++i) {
918 OpParts[i]).getReg(0);
919 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
924 CondReg =
B.buildAnd(
S1, CondReg, CmpReg).getReg(0);
925 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
929 Op.setReg(CurrentLaneReg);
932 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
937 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
941 MRI.setRegClass(CondReg, WaveRC);
944 B.buildInstr(AndSaveExecOpc)
948 MRI.setSimpleHint(NewExec, CondReg);
950 B.setInsertPt(*BodyBB, BodyBB->
end());
953 B.buildInstr(XorTermOpc)
962 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
969 B.setMBB(*RestoreExecBB);
970 B.buildInstr(MovExecTermOpc)
972 .addReg(SaveExecReg);
976 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
988 for (
unsigned Op : OpIndices) {
992 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
993 SGPROperandRegs.
insert(Reg);
997 return !SGPROperandRegs.
empty();
1017 Register Reg =
MI.getOperand(OpIdx).getReg();
1020 if (Bank == &AMDGPU::SGPRRegBank)
1024 MI.getOperand(OpIdx).setReg(Reg);
1036 assert(FirstSize % EltSize == 0);
1038 unsigned FirstPartNumElts = FirstSize / EltSize;
1039 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1060 const LLT LoadTy =
MRI.getType(DstReg);
1062 const unsigned MaxNonSmrdLoadSize = 128;
1066 if (DstBank == &AMDGPU::SGPRRegBank) {
1078 if (LoadSize == 32 &&
1082 if (LoadSize == 32 &&
1091 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1093 if (LoadSize == 32) {
1097 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1099 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1100 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1101 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1103 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1104 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1107 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1121 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1123 B.buildTrunc(
MI.getOperand(0), WideLoad);
1125 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1130 MI.eraseFromParent();
1135 if (LoadSize <= MaxNonSmrdLoadSize)
1141 if (SrcRegs.
empty())
1144 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1149 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1150 MRI.setType(BasePtrReg, PtrTy);
1152 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1153 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1154 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1165 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1176 const auto &TFI = *ST.getFrameLowering();
1184 Register AllocSize =
MI.getOperand(1).getReg();
1190 if (SizeBank != &AMDGPU::SGPRRegBank)
1193 LLT PtrTy =
MRI.getType(Dst);
1198 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1200 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1201 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1203 auto SPCopy =
B.buildCopy(PtrTy, SPReg);
1204 if (Alignment > TFI.getStackAlign()) {
1205 auto PtrAdd =
B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
1206 B.buildMaskLowPtrBits(Dst, PtrAdd,
1207 Log2(Alignment) + ST.getWavefrontSizeLog2());
1209 B.buildPtrAdd(Dst, SPCopy, ScaledSize);
1212 MI.eraseFromParent();
1219 int RsrcIdx)
const {
1220 const int NumDefs =
MI.getNumExplicitDefs();
1224 RsrcIdx += NumDefs + 1;
1231 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1232 if (!
MI.getOperand(
I).isReg())
1236 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1248 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1252 if (std::optional<int64_t> Imm =
1256 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1257 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1258 InstOffsetVal = ImmOffset;
1260 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1261 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1262 return SOffset + ImmOffset;
1277 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1278 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1279 InstOffsetVal = ImmOffset;
1285 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1286 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1288 InstOffsetVal = ImmOffset;
1302 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1308 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1318 VOffsetReg = CombinedOffset;
1320 VOffsetReg =
B.buildCopy(
S32, CombinedOffset).getReg(0);
1321 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1324 SOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1325 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1336 LLT Ty =
MRI.getType(Dst);
1342 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1343 OffsetBank == &AMDGPU::SGPRRegBank)
1351 if (LoadSize == 256 || LoadSize == 512) {
1352 NumLoads = LoadSize / 128;
1353 Ty = Ty.
divide(NumLoads);
1358 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1364 int64_t ImmOffset = 0;
1367 SOffset, ImmOffset, Alignment);
1372 const Align MemAlign(4);
1386 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1393 for (
int i = 0; i < NumLoads; ++i) {
1394 if (NumLoads == 1) {
1397 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1398 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1405 B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1406 .addDef(LoadParts[i])
1411 .addImm(ImmOffset + 16 * i)
1414 .addMemOperand(MMO);
1420 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1423 B.setInstr(*Span.
begin());
1424 MI.eraseFromParent();
1428 OpsToWaterfall.
insert(RSrc);
1433 if (NumLoads != 1) {
1435 B.buildConcatVectors(Dst, LoadParts);
1437 B.buildMergeLikeInstr(Dst, LoadParts);
1441 if (RSrcBank == &AMDGPU::SGPRRegBank)
1442 MI.eraseFromParent();
1457 LLT Ty =
MRI.getType(DstReg);
1461 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1462 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1463 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1464 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1468 if (DstBank == &AMDGPU::VGPRRegBank) {
1474 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1478 auto ShiftOffset =
Signed ?
B.buildAShr(
S64, SrcReg, OffsetReg)
1479 :
B.buildLShr(
S64, SrcReg, OffsetReg);
1480 auto UnmergeSOffset =
B.buildUnmerge({
S32,
S32}, ShiftOffset);
1487 auto Zero =
B.buildConstant(
S32, 0);
1488 auto WidthImm = ConstWidth->Value.getZExtValue();
1489 if (WidthImm <= 32) {
1493 Signed ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1494 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1496 Signed ?
B.buildAShr(
S32, Extract,
B.buildConstant(
S32, 31)) : Zero;
1497 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1501 auto UpperWidth =
B.buildConstant(
S32, WidthImm - 32);
1504 ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1505 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1506 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1508 MI.eraseFromParent();
1514 auto ExtShift =
B.buildSub(
S32,
B.buildConstant(
S32, 64), WidthReg);
1515 auto SignBit =
B.buildShl(
S64, ShiftOffset, ExtShift);
1517 B.buildAShr(
S64, SignBit, ExtShift);
1519 B.buildLShr(
S64, SignBit, ExtShift);
1520 MI.eraseFromParent();
1526 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1529 auto OffsetMask =
B.buildConstant(
S32, maskTrailingOnes<unsigned>(6));
1530 auto ClampOffset =
B.buildAnd(
S32, OffsetReg, OffsetMask);
1533 auto ShiftWidth =
B.buildShl(
S32, WidthReg,
B.buildConstant(
S32, 16));
1538 auto MergedInputs =
B.buildOr(
S32, ClampOffset, ShiftWidth);
1542 unsigned Opc = Ty ==
S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1543 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1545 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1549 MI.eraseFromParent();
1567 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1570 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1574 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1575 bool Accumulate =
true;
1584 Register DstLo =
B.buildMul(
S32, Src0, Src1).getReg(0);
1585 bool MulHiInVgpr =
false;
1587 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1590 DstHi = IsUnsigned ?
B.buildUMulH(
S32, Src0, Src1).getReg(0)
1591 :
B.buildSMulH(
S32, Src0, Src1).getReg(0);
1592 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1597 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1598 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1600 DstHi = IsUnsigned ?
B.buildUMulH(
S32, VSrc0, VSrc1).getReg(0)
1601 :
B.buildSMulH(
S32, VSrc0, VSrc1).getReg(0);
1602 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1618 LLT CarryType = DstOnValu ?
S1 :
S32;
1620 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1622 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1627 Zero =
B.buildConstant(
S32, 0).getReg(0);
1628 MRI.setRegBank(Zero,
1629 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1633 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1634 : AMDGPU::SGPRRegBank);
1636 if (DstOnValu && !MulHiInVgpr) {
1637 Carry =
B.buildTrunc(
S1, Carry).getReg(0);
1638 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1644 DstLo =
B.buildCopy(
S32, DstLo).getReg(0);
1645 DstHi =
B.buildCopy(
S32, DstHi).getReg(0);
1646 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1647 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1650 auto Unmerge =
B.buildUnmerge(
S32, Src2);
1651 Register Src2Lo = Unmerge.getReg(0);
1652 Register Src2Hi = Unmerge.getReg(1);
1653 MRI.setRegBank(Src2Lo, DstBank);
1654 MRI.setRegBank(Src2Hi, DstBank);
1658 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1660 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1661 MRI.setRegBank(Carry, CarryBank);
1664 auto AddLo =
B.buildUAddo(
S32, CarryType, DstLo, Src2Lo);
1665 DstLo = AddLo.getReg(0);
1666 Register CarryLo = AddLo.getReg(1);
1667 MRI.setRegBank(DstLo, DstBank);
1668 MRI.setRegBank(CarryLo, CarryBank);
1670 auto AddHi =
B.buildUAdde(
S32, CarryType, DstHi, Src2Hi, CarryLo);
1671 DstHi = AddHi.getReg(0);
1672 MRI.setRegBank(DstHi, DstBank);
1674 Register CarryHi = AddHi.getReg(1);
1675 MRI.setRegBank(CarryHi, CarryBank);
1680 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1681 MRI.setRegBank(Carry, CarryBank);
1685 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1686 MRI.setRegBank(Carry, CarryBank);
1690 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1693 B.buildCopy(Dst1, Carry);
1695 B.buildTrunc(Dst1, Carry);
1698 MI.eraseFromParent();
1705 case TargetOpcode::G_ASHR:
1706 case TargetOpcode::G_SMIN:
1707 case TargetOpcode::G_SMAX:
1708 return TargetOpcode::G_SEXT;
1709 case TargetOpcode::G_LSHR:
1710 case TargetOpcode::G_UMIN:
1711 case TargetOpcode::G_UMAX:
1712 return TargetOpcode::G_ZEXT;
1714 return TargetOpcode::G_ANYEXT;
1720static std::pair<Register, Register>
1723 auto Bitcast =
B.buildBitcast(
S32, Src);
1725 if (ExtOpcode == TargetOpcode::G_SEXT) {
1726 auto ExtLo =
B.buildSExtInReg(
S32, Bitcast, 16);
1727 auto ShiftHi =
B.buildAShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1728 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1731 auto ShiftHi =
B.buildLShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1732 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1733 auto ExtLo =
B.buildAnd(
S32, Bitcast,
B.buildConstant(
S32, 0xffff));
1734 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1737 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1738 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1746 if (!SrcReg.
empty()) {
1763 LLT StoreVT =
MRI.getType(Reg);
1767 auto Unmerge =
B.buildUnmerge(
S16, Reg);
1771 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
1781static std::pair<Register, unsigned>
1785 return std::pair(
Register(), Const);
1789 return std::pair(
Base, Const);
1792 return std::pair(Reg, 0);
1795std::pair<Register, unsigned>
1808 if (ImmOffset != 0) {
1817 unsigned Overflow = ImmOffset & ~MaxImm;
1818 ImmOffset -= Overflow;
1819 if ((int32_t)Overflow < 0) {
1820 Overflow += ImmOffset;
1825 if (Overflow != 0) {
1827 BaseReg =
B.buildConstant(
S32, Overflow).getReg(0);
1829 auto OverflowVal =
B.buildConstant(
S32, Overflow);
1830 BaseReg =
B.buildAdd(
S32, BaseReg, OverflowVal).getReg(0);
1836 BaseReg =
B.buildConstant(
S32, 0).getReg(0);
1838 return {BaseReg, C1};
1844 LLT SrcTy =
MRI.getType(SrcReg);
1847 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1854 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1855 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1857 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1859 .addUse(SrcReg, 0, AMDGPU::sub0);
1860 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1862 .addUse(SrcReg, 0, AMDGPU::sub1);
1863 B.buildInstr(AMDGPU::REG_SEQUENCE)
1866 .addImm(AMDGPU::sub0)
1868 .addImm(AMDGPU::sub1);
1879 unsigned ConstOffset) {
1885 auto MaterializedOffset =
B.buildConstant(
S32, ConstOffset);
1887 auto Add =
B.buildAdd(
S32, WaterfallIdx, MaterializedOffset);
1888 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1889 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1901 bool IsBooleanSrc =
false) {
1902 if (ExtOpc == AMDGPU::G_ZEXT) {
1903 B.buildConstant(Hi32Reg, 0);
1904 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1908 B.buildCopy(Hi32Reg, Lo32Reg);
1912 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1913 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1916 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1917 B.buildUndef(Hi32Reg);
1921bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1923 const OperandsMapper &OpdMapper)
const {
1930 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1932 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1934 LLT VecTy =
MRI.getType(VecReg);
1945 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1947 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1950 (DstBank == AMDGPU::SGPRRegBank &&
1951 SrcBank == AMDGPU::SGPRRegBank &&
1952 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1953 : AMDGPU::VCCRegBank;
1956 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1957 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
1958 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1963 unsigned NumLanes = DstRegs.size();
1967 EltTy =
MRI.getType(DstRegs[0]);
1969 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1971 for (
unsigned L = 0;
L < NumLanes; ++
L)
1972 Res[L] = UnmergeToEltTy.getReg(L);
1974 for (
unsigned I = 1;
I < NumElem; ++
I) {
1975 auto IC =
B.buildConstant(
S32,
I);
1976 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
1978 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
1980 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1981 auto S =
B.buildSelect(EltTy, Cmp,
1982 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
1984 for (
unsigned N : { 0, 2, 3 })
1985 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
1987 Res[
L] = S->getOperand(0).getReg();
1991 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1992 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
1993 B.buildCopy(DstReg, Res[L]);
1994 MRI.setRegBank(DstReg, DstBank);
1997 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
1998 MI.eraseFromParent();
2009 if (CurrBank && *CurrBank != Bank) {
2010 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2011 MRI.setRegBank(Copy, Bank);
2015 MRI.setRegBank(Reg, Bank);
2019bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2021 const OperandsMapper &OpdMapper)
const {
2028 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2030 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2032 LLT VecTy =
MRI.getType(VecReg);
2043 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2045 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2047 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2050 (DstBank == AMDGPU::SGPRRegBank &&
2051 SrcBank == AMDGPU::SGPRRegBank &&
2052 InsBank == AMDGPU::SGPRRegBank &&
2053 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2054 : AMDGPU::VCCRegBank;
2057 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2058 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
2059 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2064 unsigned NumLanes = InsRegs.size();
2067 InsRegs.push_back(
MI.getOperand(2).getReg());
2069 EltTy =
MRI.getType(InsRegs[0]);
2072 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2075 for (
unsigned I = 0;
I < NumElem; ++
I) {
2076 auto IC =
B.buildConstant(
S32,
I);
2077 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2079 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2081 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2083 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2094 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2095 B.buildBuildVector(
MI.getOperand(0), Ops);
2097 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2098 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2099 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2102 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2103 MI.eraseFromParent();
2116 if (DefRegs.
empty()) {
2124 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2135 if (Src0Regs.
empty())
2140 if (Src1Regs.
empty())
2163 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2165 Register Hi =
B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2166 Register MulLoHi =
B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2168 Register MulHiLo =
B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2169 B.buildAdd(DefRegs[1],
Add, MulHiLo);
2170 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2172 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2173 MI.eraseFromParent();
2179 B.setInstrAndDebugLoc(
MI);
2180 unsigned Opc =
MI.getOpcode();
2183 case AMDGPU::G_CONSTANT:
2184 case AMDGPU::G_IMPLICIT_DEF: {
2186 LLT DstTy =
MRI.getType(DstReg);
2192 if (DstBank == &AMDGPU::VCCRegBank)
2195 if (DefRegs.
empty())
2198 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2201 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2203 MI.getOperand(0).setReg(NewDstReg);
2204 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2205 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2206 MI.getOperand(1).setCImm(
2210 MRI.setRegBank(NewDstReg, *DstBank);
2211 B.buildTrunc(DefRegs[0], NewDstReg);
2214 case AMDGPU::G_PHI: {
2216 LLT DstTy =
MRI.getType(DstReg);
2223 if (DstBank == &AMDGPU::VCCRegBank) {
2230 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2234 if (SrcBank != &AMDGPU::VCCRegBank) {
2239 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2240 MI.getOperand(
I).setReg(Copy.getReg(0));
2251 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2252 B.setInsertPt(
B.getMBB(),
MI);
2260 case AMDGPU::G_FCMP:
2264 case AMDGPU::G_ICMP:
2265 case AMDGPU::G_UADDO:
2266 case AMDGPU::G_USUBO:
2267 case AMDGPU::G_UADDE:
2268 case AMDGPU::G_SADDE:
2269 case AMDGPU::G_USUBE:
2270 case AMDGPU::G_SSUBE: {
2271 unsigned BoolDstOp =
2272 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2273 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2277 if (DstBank != &AMDGPU::SGPRRegBank)
2280 const bool HasCarryIn =
MI.getNumOperands() == 5;
2286 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2287 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2291 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2292 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2293 MI.getOperand(4).setReg(NewSrcReg);
2297 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2302 if (DefRegs.
empty())
2304 B.buildTrunc(DefRegs[0], NewDstReg);
2307 case AMDGPU::G_SELECT: {
2309 LLT DstTy =
MRI.getType(DstReg);
2312 if (CondRegs.
empty())
2319 if (CondBank == &AMDGPU::SGPRRegBank) {
2322 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2324 MI.getOperand(1).setReg(NewCondReg);
2325 B.buildZExt(NewCondReg, CondRegs[0]);
2338 if (DefRegs.
empty()) {
2343 if (Src1Regs.
empty())
2349 if (Src2Regs.
empty())
2356 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0]);
2357 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1]);
2359 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2360 MI.eraseFromParent();
2363 case AMDGPU::G_BRCOND: {
2364 Register CondReg =
MI.getOperand(0).getReg();
2369 if (CondBank == &AMDGPU::SGPRRegBank) {
2372 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2374 MI.getOperand(0).setReg(NewCondReg);
2375 B.buildZExt(NewCondReg, CondReg);
2383 case AMDGPU::G_XOR: {
2387 LLT DstTy =
MRI.getType(DstReg);
2392 if (DstBank == &AMDGPU::VCCRegBank)
2396 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2414 if (DefRegs.
empty()) {
2421 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2427 if (Src0Regs.
empty())
2432 if (Src1Regs.
empty())
2439 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
2440 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});
2442 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2443 MI.eraseFromParent();
2446 case AMDGPU::G_ABS: {
2452 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2454 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2467 case AMDGPU::G_LSHR:
2468 case AMDGPU::G_ASHR:
2469 case AMDGPU::G_SMIN:
2470 case AMDGPU::G_SMAX:
2471 case AMDGPU::G_UMIN:
2472 case AMDGPU::G_UMAX: {
2474 LLT DstTy =
MRI.getType(DstReg);
2491 if (DstBank == &AMDGPU::VGPRRegBank)
2497 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2499 if (DstTy.
isVector() && Opc == AMDGPU::G_ABS) {
2502 std::tie(WideSrcLo, WideSrcHi) =
2504 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2505 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2506 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2507 MI.eraseFromParent();
2516 std::tie(WideSrc0Lo, WideSrc0Hi)
2518 std::tie(WideSrc1Lo, WideSrc1Hi)
2520 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2521 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2522 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2523 MI.eraseFromParent();
2531 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2532 Opc == AMDGPU::G_ASHR) {
2533 B.setInsertPt(*
MBB,
MI.getIterator());
2541 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2542 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2556 Register SrcReg0 =
MI.getOperand(1).getReg();
2557 Register SrcReg1 =
MI.getOperand(2).getReg();
2560 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2561 "that handles only 64-bit operands.");
2567 if (DstBank == &AMDGPU::SGPRRegBank) {
2568 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2569 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2570 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2571 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2577 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2578 "The destination operand should be in vector registers.");
2583 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2584 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2586 B.buildTrunc(Op0L, SrcReg0);
2589 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2590 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2592 B.buildTrunc(Op1L, SrcReg1);
2594 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2595 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2596 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2600 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2601 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2602 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2603 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2604 MI.eraseFromParent();
2607 case AMDGPU::G_SEXT_INREG: {
2609 if (SrcRegs.
empty())
2613 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2620 int Amt =
MI.getOperand(2).getImm();
2626 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2628 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2630 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2633 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2637 B.buildCopy(DstRegs[0], SrcRegs[0]);
2638 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2642 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2643 MI.eraseFromParent();
2646 case AMDGPU::G_CTPOP:
2647 case AMDGPU::G_BITREVERSE: {
2650 if (DstBank == &AMDGPU::SGPRRegBank)
2655 LLT Ty =
MRI.getType(SrcReg);
2659 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2668 case AMDGPU::G_AMDGPU_FFBH_U32:
2669 case AMDGPU::G_AMDGPU_FFBL_B32:
2670 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2671 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2674 if (DstBank == &AMDGPU::SGPRRegBank)
2679 LLT Ty =
MRI.getType(SrcReg);
2689 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2691 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2692 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2693 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2694 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2696 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2697 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx]});
2698 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx ^ 1]});
2700 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2702 : AMDGPU::G_UADDSAT;
2703 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2705 B.buildUMin(DstReg,
X,
Y);
2706 MI.eraseFromParent();
2709 case AMDGPU::G_SEXT:
2710 case AMDGPU::G_ZEXT:
2711 case AMDGPU::G_ANYEXT: {
2713 LLT SrcTy =
MRI.getType(SrcReg);
2714 const bool Signed = Opc == AMDGPU::G_SEXT;
2722 LLT DstTy =
MRI.getType(DstReg);
2724 SrcBank != &AMDGPU::SGPRRegBank &&
2725 SrcBank != &AMDGPU::VCCRegBank &&
2735 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2736 }
else if (Opc == AMDGPU::G_ZEXT) {
2737 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2739 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2743 MRI.setRegBank(DstReg, *SrcBank);
2744 MI.eraseFromParent();
2754 if (SrcBank == &AMDGPU::VCCRegBank) {
2761 const bool UseSel64 = DstSize > 32 &&
2762 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2766 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2767 auto False =
B.buildConstant(SelType, 0);
2769 MRI.setRegBank(True.getReg(0), *DstBank);
2770 MRI.setRegBank(False.getReg(0), *DstBank);
2771 MRI.setRegBank(DstReg, *DstBank);
2774 B.buildSelect(DefRegs[0], SrcReg, True, False);
2776 }
else if (DstSize < 32) {
2777 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2778 MRI.setRegBank(Sel.getReg(0), *DstBank);
2779 B.buildTrunc(DstReg, Sel);
2781 B.buildSelect(DstReg, SrcReg, True, False);
2784 MI.eraseFromParent();
2790 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2799 LLT DstTy =
MRI.getType(DstReg);
2800 LLT SrcTy =
MRI.getType(SrcReg);
2802 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2814 unsigned ConstOffset;
2815 std::tie(BaseIdxReg, ConstOffset) =
2822 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2827 if (ShouldMoveIndexIntoLoop)
2828 MI.getOperand(2).setReg(BaseIdxReg);
2834 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2835 SrcBank == &AMDGPU::SGPRRegBank;
2836 if (DstRegs.
empty()) {
2841 if (NeedCopyToVGPR) {
2843 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2844 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2845 MI.getOperand(0).setReg(TmpReg);
2846 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2853 if (ShouldMoveIndexIntoLoop)
2863 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2864 auto One =
B.buildConstant(
S32, 1);
2875 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2876 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2878 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2879 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2881 MRI.setRegBank(DstReg, *DstBank);
2882 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2883 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2884 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2885 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2889 MI.eraseFromParent();
2895 B.setInstr(*Span.
begin());
2896 MI.eraseFromParent();
2900 if (NeedCopyToVGPR) {
2904 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2905 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2907 Extract0->getOperand(0).setReg(TmpReg0);
2908 Extract1->getOperand(0).setReg(TmpReg1);
2916 if (ShouldMoveIndexIntoLoop)
2921 case AMDGPU::G_INSERT_VECTOR_ELT: {
2925 LLT VecTy =
MRI.getType(DstReg);
2931 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2933 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2941 LLT InsTy =
MRI.getType(InsReg);
2945 unsigned ConstOffset;
2946 std::tie(BaseIdxReg, ConstOffset) =
2953 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2958 if (ShouldMoveIndexIntoLoop)
2959 MI.getOperand(3).setReg(BaseIdxReg);
2962 if (InsRegs.
empty()) {
2966 if (ShouldMoveIndexIntoLoop) {
2978 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2979 auto One =
B.buildConstant(
S32, 1);
2988 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2989 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2991 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
2992 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3001 MRI.setRegBank(InsReg, *InsSrcBank);
3002 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3003 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3004 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3005 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3006 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3007 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3012 B.setInsertPt(
B.getMBB(),
MI);
3013 B.buildBitcast(DstReg, InsHi);
3014 MI.eraseFromParent();
3018 B.setInstr(*Span.
begin());
3019 MI.eraseFromParent();
3030 B.buildBitcast(DstReg, InsHi);
3033 if (ShouldMoveIndexIntoLoop)
3038 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3039 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3040 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3041 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3042 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3043 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3044 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3045 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3046 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3047 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3048 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3049 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3050 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3051 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3052 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3053 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3054 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3055 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3056 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3057 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3058 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3059 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3064 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3065 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3066 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3067 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3068 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3069 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3070 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3071 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3072 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3073 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3074 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3075 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
3080 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3081 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3082 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3087 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3092 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3093 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3094 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3095 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3096 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3100 case AMDGPU::G_INTRINSIC:
3101 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3103 case Intrinsic::amdgcn_readlane: {
3114 case Intrinsic::amdgcn_writelane: {
3124 case Intrinsic::amdgcn_interp_p1:
3125 case Intrinsic::amdgcn_interp_p2:
3126 case Intrinsic::amdgcn_interp_mov:
3127 case Intrinsic::amdgcn_interp_p1_f16:
3128 case Intrinsic::amdgcn_interp_p2_f16:
3129 case Intrinsic::amdgcn_lds_param_load: {
3137 case Intrinsic::amdgcn_interp_inreg_p10:
3138 case Intrinsic::amdgcn_interp_inreg_p2:
3139 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3140 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3141 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3142 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3145 case Intrinsic::amdgcn_permlane16:
3146 case Intrinsic::amdgcn_permlanex16: {
3154 case Intrinsic::amdgcn_sbfe:
3157 case Intrinsic::amdgcn_ubfe:
3160 case Intrinsic::amdgcn_inverse_ballot:
3161 case Intrinsic::amdgcn_s_bitreplicate:
3162 case Intrinsic::amdgcn_s_quadmask:
3163 case Intrinsic::amdgcn_s_wqm:
3167 case Intrinsic::amdgcn_ballot:
3173 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3174 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3175 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3176 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3186 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3187 unsigned N =
MI.getNumExplicitOperands() - 2;
3192 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3193 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3194 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3196 case Intrinsic::amdgcn_ds_ordered_add:
3197 case Intrinsic::amdgcn_ds_ordered_swap: {
3204 case Intrinsic::amdgcn_ds_gws_init:
3205 case Intrinsic::amdgcn_ds_gws_barrier:
3206 case Intrinsic::amdgcn_ds_gws_sema_br: {
3212 case Intrinsic::amdgcn_ds_gws_sema_v:
3213 case Intrinsic::amdgcn_ds_gws_sema_p:
3214 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3219 case Intrinsic::amdgcn_ds_append:
3220 case Intrinsic::amdgcn_ds_consume: {
3224 case Intrinsic::amdgcn_s_sendmsg:
3225 case Intrinsic::amdgcn_s_sendmsghalt: {
3230 case Intrinsic::amdgcn_s_setreg: {
3234 case Intrinsic::amdgcn_s_ttracedata:
3237 case Intrinsic::amdgcn_raw_buffer_load_lds:
3238 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3245 case Intrinsic::amdgcn_struct_buffer_load_lds:
3246 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3253 case Intrinsic::amdgcn_global_load_lds: {
3258 case Intrinsic::amdgcn_lds_direct_load: {
3264 case Intrinsic::amdgcn_exp_row:
3268 case Intrinsic::amdgcn_s_sleep_var:
3272 case Intrinsic::amdgcn_s_barrier_signal_var:
3273 case Intrinsic::amdgcn_s_barrier_join:
3274 case Intrinsic::amdgcn_s_wakeup_barrier:
3277 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
3280 case Intrinsic::amdgcn_s_barrier_init:
3284 case Intrinsic::amdgcn_s_get_barrier_state: {
3294 if (RSrcIntrin->IsImage) {
3305 case AMDGPU::G_SI_CALL: {
3316 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3317 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3323 unsigned NonCopyInstrsLen = 0;
3329 while (Start->getOpcode() != FrameSetupOpcode) {
3331 bool IsCopy =
false;
3332 if (Start->getOpcode() == AMDGPU::COPY) {
3333 auto &Dst = Start->getOperand(0);
3336 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3341 auto &Src = Start->getOperand(1);
3344 IsCopy =
Info->getScratchRSrcReg() == Reg;
3352 NonCopyInstrsLen = NonCopyInstrs.
size();
3357 NonCopyInstrs.
resize(NonCopyInstrsLen);
3359 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3365 NonCopyInstrs.
clear();
3366 NonCopyInstrsLen = 0;
3369 while (
End->getOpcode() != FrameDestroyOpcode) {
3371 bool IsCopy =
false;
3372 if (
End->getOpcode() == AMDGPU::COPY) {
3373 auto &Src =
End->getOperand(1);
3376 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3382 NonCopyInstrsLen = NonCopyInstrs.
size();
3387 NonCopyInstrs.
resize(NonCopyInstrsLen);
3391 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3396 B.setInsertPt(
B.getMBB(), Start);
3400 case AMDGPU::G_LOAD:
3401 case AMDGPU::G_ZEXTLOAD:
3402 case AMDGPU::G_SEXTLOAD: {
3407 case AMDGPU::G_DYN_STACKALLOC:
3410 case AMDGPU::G_STACKRESTORE: {
3415 case AMDGPU::G_SBFX:
3418 case AMDGPU::G_UBFX:
3421 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3422 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3425 case AMDGPU::G_PREFETCH: {
3427 MI.eraseFromParent();
3432 if (PtrBank == AMDGPU::VGPRRegBankID) {
3433 MI.eraseFromParent();
3436 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3439 MI.eraseFromParent();
3457 if (RB0 == AMDGPU::InvalidRegBankID)
3459 if (RB1 == AMDGPU::InvalidRegBankID)
3462 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3463 return AMDGPU::SGPRRegBankID;
3465 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3466 return AMDGPU::AGPRRegBankID;
3468 return AMDGPU::VGPRRegBankID;
3472 if (RB0 == AMDGPU::InvalidRegBankID)
3474 if (RB1 == AMDGPU::InvalidRegBankID)
3480 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3481 return AMDGPU::VCCRegBankID;
3489 unsigned RegBank = AMDGPU::InvalidRegBankID;
3497 if (RegBank == AMDGPU::VGPRRegBankID)
3513 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3526 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3532 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3535 MI.getNumOperands());
3548 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3554 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3555 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3559 MI.getNumOperands());
3568 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
3574 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3578 MI.getNumOperands());
3584 int RsrcIdx)
const {
3587 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3589 const int NumOps =
MI.getNumOperands();
3594 for (
int I = 0;
I != NumOps; ++
I) {
3595 if (!
MI.getOperand(
I).isReg())
3609 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3614 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3617 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3628 LLT PtrTy =
MRI.getType(PtrReg);
3632 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3637 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3648 LLT PtrTy =
MRI.getType(PtrReg);
3660 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3661 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3663 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3668 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3670 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3673 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3674 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3677 OpdsMapping[0] = ValMapping;
3678 OpdsMapping[1] = PtrMapping;
3703 return AMDGPU::getValueMapping(Bank,
Size);
3711 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3719 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3736 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3743 assert(SrcBank &&
"src bank should have been assigned already");
3748 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3753 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3755 OpdsMapping[0] = &ValMap;
3756 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3757 OpdsMapping[1] = &ValMap;
3764 if (
MI.isRegSequence()) {
3767 unsigned BankID = AMDGPU::SGPRRegBankID;
3769 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3773 if (OpBank != AMDGPU::SGPRRegBankID) {
3774 BankID = AMDGPU::VGPRRegBankID;
3790 if (
auto *
PHI = dyn_cast<GPhi>(&
MI)) {
3791 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3796 ResultBank = DstBank->
getID();
3798 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3803 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3804 ResultBank = AMDGPU::VGPRRegBankID;
3809 unsigned OpBank = Bank->
getID();
3813 assert(ResultBank != AMDGPU::InvalidRegBankID);
3815 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3830 switch (
MI.getOpcode()) {
3837 case AMDGPU::G_MUL: {
3838 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3843 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3844 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3845 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3847 TargetBankID = DstBank->
getID();
3848 if (DstBank == &AMDGPU::VCCRegBank) {
3849 TargetBankID = AMDGPU::VCCRegBankID;
3850 BankLHS = AMDGPU::VCCRegBankID;
3851 BankRHS = AMDGPU::VCCRegBankID;
3854 AMDGPU::SGPRRegBankID);
3856 AMDGPU::SGPRRegBankID);
3860 AMDGPU::VCCRegBankID);
3862 AMDGPU::VCCRegBankID);
3865 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3866 TargetBankID = AMDGPU::VGPRRegBankID;
3867 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3868 TargetBankID = AMDGPU::VCCRegBankID;
3869 BankLHS = AMDGPU::VCCRegBankID;
3870 BankRHS = AMDGPU::VCCRegBankID;
3871 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3872 TargetBankID = AMDGPU::SGPRRegBankID;
3876 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3877 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3878 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3885 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3886 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3888 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3890 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3893 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3901 case AMDGPU::G_PTR_ADD:
3902 case AMDGPU::G_PTRMASK:
3906 case AMDGPU::G_LSHR:
3907 case AMDGPU::G_ASHR:
3908 case AMDGPU::G_UADDO:
3909 case AMDGPU::G_USUBO:
3910 case AMDGPU::G_UADDE:
3911 case AMDGPU::G_SADDE:
3912 case AMDGPU::G_USUBE:
3913 case AMDGPU::G_SSUBE:
3914 case AMDGPU::G_SMIN:
3915 case AMDGPU::G_SMAX:
3916 case AMDGPU::G_UMIN:
3917 case AMDGPU::G_UMAX:
3919 case AMDGPU::G_SHUFFLE_VECTOR:
3920 case AMDGPU::G_SBFX:
3921 case AMDGPU::G_UBFX:
3922 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
3923 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
3927 case AMDGPU::G_FADD:
3928 case AMDGPU::G_FSUB:
3929 case AMDGPU::G_FMUL:
3931 case AMDGPU::G_FFLOOR:
3932 case AMDGPU::G_FCEIL:
3933 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
3934 case AMDGPU::G_FMINNUM:
3935 case AMDGPU::G_FMAXNUM:
3936 case AMDGPU::G_FMINIMUM:
3937 case AMDGPU::G_FMAXIMUM:
3938 case AMDGPU::G_INTRINSIC_TRUNC:
3939 case AMDGPU::G_STRICT_FADD:
3940 case AMDGPU::G_STRICT_FSUB:
3941 case AMDGPU::G_STRICT_FMUL:
3942 case AMDGPU::G_STRICT_FMA: {
3943 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
3950 case AMDGPU::G_FPTOSI:
3951 case AMDGPU::G_FPTOUI:
3952 case AMDGPU::G_SITOFP:
3953 case AMDGPU::G_UITOFP: {
3954 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3955 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3961 case AMDGPU::G_FPTRUNC:
3962 case AMDGPU::G_FPEXT: {
3963 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3964 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3970 case AMDGPU::G_FSQRT:
3971 case AMDGPU::G_FEXP2:
3972 case AMDGPU::G_FLOG2: {
3973 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3979 case AMDGPU::G_SADDSAT:
3980 case AMDGPU::G_SSUBSAT:
3981 case AMDGPU::G_UADDSAT:
3982 case AMDGPU::G_USUBSAT:
3983 case AMDGPU::G_FMAD:
3984 case AMDGPU::G_FLDEXP:
3985 case AMDGPU::G_FMINNUM_IEEE:
3986 case AMDGPU::G_FMAXNUM_IEEE:
3987 case AMDGPU::G_FCANONICALIZE:
3988 case AMDGPU::G_STRICT_FLDEXP:
3989 case AMDGPU::G_BSWAP:
3990 case AMDGPU::G_FSHR:
3991 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
3992 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
3993 case AMDGPU::G_AMDGPU_RCP_IFLAG:
3994 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
3995 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
3996 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
3997 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
3998 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
3999 case AMDGPU::G_AMDGPU_SMED3:
4000 case AMDGPU::G_AMDGPU_FMED3:
4002 case AMDGPU::G_UMULH:
4003 case AMDGPU::G_SMULH: {
4008 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4009 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4018 bool AllSalu =
true;
4019 bool MulSalu =
true;
4020 for (
unsigned i = 0; i < 5; ++i) {
4023 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4025 if (i == 2 || i == 3) {
4043 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4044 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4045 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4046 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4047 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4050 case AMDGPU::G_IMPLICIT_DEF: {
4051 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4052 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4055 case AMDGPU::G_FCONSTANT:
4056 case AMDGPU::G_CONSTANT:
4057 case AMDGPU::G_GLOBAL_VALUE:
4058 case AMDGPU::G_BLOCK_ADDR:
4059 case AMDGPU::G_READSTEADYCOUNTER:
4060 case AMDGPU::G_READCYCLECOUNTER: {
4061 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4062 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4065 case AMDGPU::G_FRAME_INDEX: {
4068 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4069 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4072 case AMDGPU::G_DYN_STACKALLOC: {
4074 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4076 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4079 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4084 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4085 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4088 case AMDGPU::G_INSERT: {
4093 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4094 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4095 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4096 OpdsMapping[3] =
nullptr;
4099 case AMDGPU::G_EXTRACT: {
4103 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4104 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4105 OpdsMapping[2] =
nullptr;
4108 case AMDGPU::G_BUILD_VECTOR:
4109 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4110 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4113 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4116 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4118 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4119 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4120 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4126 case AMDGPU::G_MERGE_VALUES:
4127 case AMDGPU::G_CONCAT_VECTORS: {
4129 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4130 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4132 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4134 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4135 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4138 case AMDGPU::G_BITREVERSE:
4139 case AMDGPU::G_BITCAST:
4140 case AMDGPU::G_INTTOPTR:
4141 case AMDGPU::G_PTRTOINT:
4142 case AMDGPU::G_FABS:
4143 case AMDGPU::G_FNEG: {
4144 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4146 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4149 case AMDGPU::G_AMDGPU_FFBH_U32:
4150 case AMDGPU::G_AMDGPU_FFBL_B32:
4151 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4152 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4153 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4155 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4156 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4159 case AMDGPU::G_CTPOP: {
4160 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4162 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4167 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4170 case AMDGPU::G_TRUNC: {
4176 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4177 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4180 case AMDGPU::G_ZEXT:
4181 case AMDGPU::G_SEXT:
4182 case AMDGPU::G_ANYEXT:
4183 case AMDGPU::G_SEXT_INREG: {
4192 switch (SrcBank->
getID()) {
4193 case AMDGPU::SGPRRegBankID:
4194 DstBank = AMDGPU::SGPRRegBankID;
4197 DstBank = AMDGPU::VGPRRegBankID;
4203 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4204 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4208 case AMDGPU::G_IS_FPCLASS: {
4210 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4211 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4212 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4213 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4216 case AMDGPU::G_STORE: {
4218 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4223 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4224 OpdsMapping[0] = ValMapping;
4228 case AMDGPU::G_ICMP:
4229 case AMDGPU::G_FCMP: {
4230 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4235 AMDGPU::SGPRRegBankID);
4239 auto canUseSCCICMP = [&]() {
4242 return Size == 32 ||
4247 auto canUseSCCFCMP = [&]() {
4251 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4252 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4253 Op2Bank == AMDGPU::SGPRRegBankID &&
4254 Op3Bank == AMDGPU::SGPRRegBankID &&
4255 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4257 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4258 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4262 const unsigned ResultSize = 1;
4264 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4265 OpdsMapping[1] =
nullptr;
4266 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4267 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4270 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4273 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4274 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4275 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4277 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4279 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4280 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4283 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4286 case AMDGPU::G_INSERT_VECTOR_ELT: {
4288 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4290 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4291 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4292 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4296 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4297 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4301 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4302 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4305 assert(InsertSize == 32 || InsertSize == 64);
4306 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4310 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4313 case AMDGPU::G_UNMERGE_VALUES: {
4318 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4320 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4324 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4325 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4326 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4327 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4328 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4329 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4330 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4331 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4332 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4333 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4334 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4335 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4336 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4337 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4338 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4339 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4340 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4341 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4342 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4343 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4344 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4345 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4364 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4365 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4366 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4367 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4368 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4369 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4370 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4371 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4372 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4373 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4374 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4375 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4376 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4377 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4378 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4401 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4427 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4428 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4429 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4430 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4431 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4439 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4440 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4441 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4443 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4444 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4447 case AMDGPU::G_INTRINSIC:
4448 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4452 case Intrinsic::amdgcn_div_fmas:
4453 case Intrinsic::amdgcn_div_fixup:
4454 case Intrinsic::amdgcn_trig_preop:
4455 case Intrinsic::amdgcn_sin:
4456 case Intrinsic::amdgcn_cos:
4457 case Intrinsic::amdgcn_log_clamp:
4458 case Intrinsic::amdgcn_rcp_legacy:
4459 case Intrinsic::amdgcn_rsq_legacy:
4460 case Intrinsic::amdgcn_rsq_clamp:
4461 case Intrinsic::amdgcn_fmul_legacy:
4462 case Intrinsic::amdgcn_fma_legacy:
4463 case Intrinsic::amdgcn_frexp_mant:
4464 case Intrinsic::amdgcn_frexp_exp:
4465 case Intrinsic::amdgcn_fract:
4466 case Intrinsic::amdgcn_cvt_pknorm_i16:
4467 case Intrinsic::amdgcn_cvt_pknorm_u16:
4468 case Intrinsic::amdgcn_cvt_pk_i16:
4469 case Intrinsic::amdgcn_cvt_pk_u16:
4470 case Intrinsic::amdgcn_fmed3:
4471 case Intrinsic::amdgcn_cubeid:
4472 case Intrinsic::amdgcn_cubema:
4473 case Intrinsic::amdgcn_cubesc:
4474 case Intrinsic::amdgcn_cubetc:
4475 case Intrinsic::amdgcn_sffbh:
4476 case Intrinsic::amdgcn_fmad_ftz:
4477 case Intrinsic::amdgcn_mbcnt_lo:
4478 case Intrinsic::amdgcn_mbcnt_hi:
4479 case Intrinsic::amdgcn_mul_u24:
4480 case Intrinsic::amdgcn_mul_i24:
4481 case Intrinsic::amdgcn_mulhi_u24:
4482 case Intrinsic::amdgcn_mulhi_i24:
4483 case Intrinsic::amdgcn_lerp:
4484 case Intrinsic::amdgcn_sad_u8:
4485 case Intrinsic::amdgcn_msad_u8:
4486 case Intrinsic::amdgcn_sad_hi_u8:
4487 case Intrinsic::amdgcn_sad_u16:
4488 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4489 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4490 case Intrinsic::amdgcn_mqsad_u32_u8:
4491 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4492 case Intrinsic::amdgcn_alignbyte:
4493 case Intrinsic::amdgcn_perm:
4494 case Intrinsic::amdgcn_fdot2:
4495 case Intrinsic::amdgcn_sdot2:
4496 case Intrinsic::amdgcn_udot2:
4497 case Intrinsic::amdgcn_sdot4:
4498 case Intrinsic::amdgcn_udot4:
4499 case Intrinsic::amdgcn_sdot8:
4500 case Intrinsic::amdgcn_udot8:
4501 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4502 case Intrinsic::amdgcn_fdot2_f16_f16:
4503 case Intrinsic::amdgcn_fdot2_f32_bf16:
4504 case Intrinsic::amdgcn_sudot4:
4505 case Intrinsic::amdgcn_sudot8:
4506 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4507 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4508 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4509 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4510 case Intrinsic::amdgcn_cvt_f32_fp8:
4511 case Intrinsic::amdgcn_cvt_f32_bf8:
4512 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4513 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4514 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4515 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4516 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4517 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4518 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4519 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4520 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4521 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4522 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4523 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4524 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4525 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4526 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4527 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4528 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4529 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4530 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4531 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4532 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4533 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4534 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4535 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4536 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4537 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4538 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4539 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4540 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4541 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4543 case Intrinsic::amdgcn_log:
4544 case Intrinsic::amdgcn_exp2:
4545 case Intrinsic::amdgcn_rcp:
4546 case Intrinsic::amdgcn_rsq:
4547 case Intrinsic::amdgcn_sqrt: {
4548 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4554 case Intrinsic::amdgcn_sbfe:
4555 case Intrinsic::amdgcn_ubfe:
4559 case Intrinsic::amdgcn_ds_swizzle:
4560 case Intrinsic::amdgcn_ds_permute:
4561 case Intrinsic::amdgcn_ds_bpermute:
4562 case Intrinsic::amdgcn_update_dpp:
4563 case Intrinsic::amdgcn_mov_dpp8:
4564 case Intrinsic::amdgcn_mov_dpp:
4565 case Intrinsic::amdgcn_strict_wwm:
4566 case Intrinsic::amdgcn_wwm:
4567 case Intrinsic::amdgcn_strict_wqm:
4568 case Intrinsic::amdgcn_wqm:
4569 case Intrinsic::amdgcn_softwqm:
4570 case Intrinsic::amdgcn_set_inactive:
4571 case Intrinsic::amdgcn_set_inactive_chain_arg:
4572 case Intrinsic::amdgcn_permlane64:
4574 case Intrinsic::amdgcn_cvt_pkrtz:
4578 case Intrinsic::amdgcn_kernarg_segment_ptr:
4579 case Intrinsic::amdgcn_s_getpc:
4580 case Intrinsic::amdgcn_groupstaticsize:
4581 case Intrinsic::amdgcn_reloc_constant:
4582 case Intrinsic::returnaddress: {
4583 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4584 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4587 case Intrinsic::amdgcn_wqm_vote: {
4588 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4589 OpdsMapping[0] = OpdsMapping[2]
4590 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4593 case Intrinsic::amdgcn_ps_live: {
4594 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4597 case Intrinsic::amdgcn_div_scale: {
4598 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4599 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4600 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4601 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4603 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4604 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4605 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4608 case Intrinsic::amdgcn_class: {
4609 Register Src0Reg =
MI.getOperand(2).getReg();
4610 Register Src1Reg =
MI.getOperand(3).getReg();
4611 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4612 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4613 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4614 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4615 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4616 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4619 case Intrinsic::amdgcn_icmp:
4620 case Intrinsic::amdgcn_fcmp: {
4621 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4623 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4624 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4625 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4626 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4629 case Intrinsic::amdgcn_readlane: {
4632 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4634 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4637 case Intrinsic::amdgcn_readfirstlane: {
4638 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4639 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4640 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4641 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4644 case Intrinsic::amdgcn_writelane: {
4645 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4647 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4650 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4652 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4656 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4657 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4658 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4661 case Intrinsic::amdgcn_if_break: {
4663 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4664 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4665 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4668 case Intrinsic::amdgcn_permlane16:
4669 case Intrinsic::amdgcn_permlanex16: {
4671 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4672 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4673 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4678 case Intrinsic::amdgcn_permlane16_var:
4679 case Intrinsic::amdgcn_permlanex16_var: {
4681 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4682 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4683 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4684 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4687 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4688 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4689 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4690 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4691 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4692 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4693 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4694 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4695 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4696 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4697 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4698 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4699 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4700 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4701 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4702 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4703 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4704 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4705 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4706 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4707 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4708 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4709 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4710 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4711 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4712 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4713 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4714 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4715 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4716 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4717 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4718 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4719 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4720 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4721 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4722 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4723 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4724 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4725 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: {
4734 Info->mayNeedAGPRs()
4740 Info->mayNeedAGPRs()
4745 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
4746 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
4747 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
4748 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
4749 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
4750 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
4751 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
4752 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
4753 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
4754 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
4755 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
4756 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
4757 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
4758 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: {
4767 case Intrinsic::amdgcn_interp_p1:
4768 case Intrinsic::amdgcn_interp_p2:
4769 case Intrinsic::amdgcn_interp_mov:
4770 case Intrinsic::amdgcn_interp_p1_f16:
4771 case Intrinsic::amdgcn_interp_p2_f16:
4772 case Intrinsic::amdgcn_lds_param_load: {
4773 const int M0Idx =
MI.getNumOperands() - 1;
4774 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4776 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4778 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4779 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4780 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4784 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4787 case Intrinsic::amdgcn_interp_inreg_p10:
4788 case Intrinsic::amdgcn_interp_inreg_p2:
4789 case Intrinsic::amdgcn_interp_inreg_p10_f16:
4790 case Intrinsic::amdgcn_interp_inreg_p2_f16:
4791 case Intrinsic::amdgcn_interp_p10_rtz_f16:
4792 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
4793 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4794 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4795 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4796 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4797 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4800 case Intrinsic::amdgcn_ballot: {
4801 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4802 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4803 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4804 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
4807 case Intrinsic::amdgcn_inverse_ballot: {
4809 Register MaskReg =
MI.getOperand(2).getReg();
4810 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4811 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4812 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4813 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4816 case Intrinsic::amdgcn_s_quadmask:
4817 case Intrinsic::amdgcn_s_wqm: {
4818 Register MaskReg =
MI.getOperand(2).getReg();
4819 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4820 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4821 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
4822 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4825 case Intrinsic::amdgcn_wave_reduce_umin:
4826 case Intrinsic::amdgcn_wave_reduce_umax: {
4827 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4828 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4829 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4832 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
4835 case Intrinsic::amdgcn_s_bitreplicate:
4836 Register MaskReg =
MI.getOperand(2).getReg();
4837 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4838 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
4839 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
4843 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4844 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4845 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4846 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4849 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
4856 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
4857 unsigned N =
MI.getNumExplicitOperands() - 2;
4858 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
4862 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4865 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4868 for (
unsigned I = 2;
I <
N; ++
I) {
4869 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
4870 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4875 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
4876 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
4877 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
4879 case Intrinsic::amdgcn_s_getreg:
4880 case Intrinsic::amdgcn_s_memtime:
4881 case Intrinsic::amdgcn_s_memrealtime:
4882 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
4883 case Intrinsic::amdgcn_s_sendmsg_rtn: {
4884 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4885 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4888 case Intrinsic::amdgcn_global_atomic_fadd:
4889 case Intrinsic::amdgcn_global_atomic_csub:
4890 case Intrinsic::amdgcn_global_atomic_fmin:
4891 case Intrinsic::amdgcn_global_atomic_fmax:
4892 case Intrinsic::amdgcn_global_atomic_fmin_num:
4893 case Intrinsic::amdgcn_global_atomic_fmax_num:
4894 case Intrinsic::amdgcn_flat_atomic_fadd:
4895 case Intrinsic::amdgcn_flat_atomic_fmin:
4896 case Intrinsic::amdgcn_flat_atomic_fmax:
4897 case Intrinsic::amdgcn_flat_atomic_fmin_num:
4898 case Intrinsic::amdgcn_flat_atomic_fmax_num:
4899 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
4900 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
4901 case Intrinsic::amdgcn_atomic_cond_sub_u32:
4902 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
4903 case Intrinsic::amdgcn_global_load_tr_b64:
4904 case Intrinsic::amdgcn_global_load_tr_b128:
4906 case Intrinsic::amdgcn_ds_ordered_add:
4907 case Intrinsic::amdgcn_ds_ordered_swap: {
4908 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4909 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4911 AMDGPU::SGPRRegBankID);
4912 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
4913 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4916 case Intrinsic::amdgcn_ds_append:
4917 case Intrinsic::amdgcn_ds_consume: {
4918 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4919 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4923 case Intrinsic::amdgcn_exp_compr:
4924 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4925 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4927 case Intrinsic::amdgcn_exp:
4929 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4930 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4931 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4932 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4934 case Intrinsic::amdgcn_exp_row:
4935 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4936 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4937 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4938 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4941 case Intrinsic::amdgcn_s_sendmsg:
4942 case Intrinsic::amdgcn_s_sendmsghalt: {
4945 AMDGPU::SGPRRegBankID);
4946 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4949 case Intrinsic::amdgcn_s_setreg: {
4952 AMDGPU::SGPRRegBankID);
4953 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4956 case Intrinsic::amdgcn_s_ttracedata: {
4960 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
4963 case Intrinsic::amdgcn_end_cf: {
4965 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4968 case Intrinsic::amdgcn_else: {
4970 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4971 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4972 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4975 case Intrinsic::amdgcn_live_mask: {
4976 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4979 case Intrinsic::amdgcn_wqm_demote:
4980 case Intrinsic::amdgcn_kill: {
4981 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4984 case Intrinsic::amdgcn_raw_buffer_load:
4985 case Intrinsic::amdgcn_raw_ptr_buffer_load:
4986 case Intrinsic::amdgcn_raw_tbuffer_load:
4987 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
4996 case Intrinsic::amdgcn_raw_buffer_load_lds:
4997 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
5004 case Intrinsic::amdgcn_raw_buffer_store:
5005 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5006 case Intrinsic::amdgcn_raw_buffer_store_format:
5007 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5008 case Intrinsic::amdgcn_raw_tbuffer_store:
5009 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5016 case Intrinsic::amdgcn_struct_buffer_load:
5017 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5018 case Intrinsic::amdgcn_struct_tbuffer_load:
5019 case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
5027 case Intrinsic::amdgcn_struct_buffer_load_lds:
5028 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5036 case Intrinsic::amdgcn_struct_buffer_store:
5037 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5038 case Intrinsic::amdgcn_struct_tbuffer_store:
5039 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5047 case Intrinsic::amdgcn_init_exec_from_input: {
5049 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5052 case Intrinsic::amdgcn_ds_gws_init:
5053 case Intrinsic::amdgcn_ds_gws_barrier:
5054 case Intrinsic::amdgcn_ds_gws_sema_br: {
5055 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5059 AMDGPU::SGPRRegBankID);
5060 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5063 case Intrinsic::amdgcn_ds_gws_sema_v:
5064 case Intrinsic::amdgcn_ds_gws_sema_p:
5065 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5068 AMDGPU::SGPRRegBankID);
5069 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5072 case Intrinsic::amdgcn_global_load_lds: {
5077 case Intrinsic::amdgcn_lds_direct_load: {
5078 const int M0Idx =
MI.getNumOperands() - 1;
5079 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5081 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5083 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5084 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5085 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5089 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5092 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5093 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5097 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
5110 case Intrinsic::amdgcn_s_sleep_var:
5113 case Intrinsic::amdgcn_s_barrier_signal_var:
5114 case Intrinsic::amdgcn_s_barrier_join:
5115 case Intrinsic::amdgcn_s_wakeup_barrier:
5118 case Intrinsic::amdgcn_s_barrier_init:
5122 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: {
5123 const unsigned ResultSize = 1;
5125 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5129 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
5130 case Intrinsic::amdgcn_s_barrier_leave: {
5131 const unsigned ResultSize = 1;
5133 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5136 case Intrinsic::amdgcn_s_get_barrier_state: {
5141 case Intrinsic::amdgcn_pops_exiting_wave_id:
5148 case AMDGPU::G_SELECT: {
5149 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5151 AMDGPU::SGPRRegBankID);
5153 AMDGPU::SGPRRegBankID);
5154 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5155 Op3Bank == AMDGPU::SGPRRegBankID;
5157 unsigned CondBankDefault = SGPRSrcs ?
5158 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5161 if (CondBank == AMDGPU::SGPRRegBankID)
5162 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5163 else if (CondBank == AMDGPU::VGPRRegBankID)
5164 CondBank = AMDGPU::VCCRegBankID;
5166 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5167 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5169 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5173 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5174 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5175 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5176 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5178 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5179 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5180 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5181 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5187 case AMDGPU::G_SI_CALL: {
5188 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5194 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5195 if (
MI.getOperand(
I).isReg()) {
5199 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5204 case AMDGPU::G_LOAD:
5205 case AMDGPU::G_ZEXTLOAD:
5206 case AMDGPU::G_SEXTLOAD:
5209 case AMDGPU::G_ATOMICRMW_XCHG:
5210 case AMDGPU::G_ATOMICRMW_ADD:
5211 case AMDGPU::G_ATOMICRMW_SUB:
5212 case AMDGPU::G_ATOMICRMW_AND:
5213 case AMDGPU::G_ATOMICRMW_OR:
5214 case AMDGPU::G_ATOMICRMW_XOR:
5215 case AMDGPU::G_ATOMICRMW_MAX:
5216 case AMDGPU::G_ATOMICRMW_MIN:
5217 case AMDGPU::G_ATOMICRMW_UMAX:
5218 case AMDGPU::G_ATOMICRMW_UMIN:
5219 case AMDGPU::G_ATOMICRMW_FADD:
5220 case AMDGPU::G_ATOMICRMW_FMIN:
5221 case AMDGPU::G_ATOMICRMW_FMAX:
5222 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5223 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5224 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5230 case AMDGPU::G_ATOMIC_CMPXCHG: {
5237 case AMDGPU::G_BRCOND: {
5239 AMDGPU::SGPRRegBankID);
5240 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
5241 if (Bank != AMDGPU::SGPRRegBankID)
5242 Bank = AMDGPU::VCCRegBankID;
5244 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5247 case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
5248 case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
5250 case AMDGPU::G_PREFETCH:
5257 MI.getNumOperands());
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)
static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)
static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)
Implement extending a 32-bit value to a 64-bit value.
static unsigned getExtendOp(unsigned Opc)
static bool isVectorRegisterBank(const RegisterBank &Bank)
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)
Split Ty into 2 pieces.
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)
Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.
static LLT widen96To128(LLT Ty)
static LLT getHalfSizedType(LLT Ty)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isUniformMMO(const MachineMemOperand *MMO)
bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const
bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const
const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const
InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const
bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const
Legalize instruction MI where operands in OpIndices must be SGPRs.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const
Handle register layout difference for f16 images for some subtargets.
const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const
void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override
See RegisterBankInfo::applyMapping.
bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const
bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const
const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool isScalarLoadLegal(const MachineInstr &MI) const
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const
const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const
Split 64-bit value Reg into two 32-bit halves and populate them into Regs.
const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const
Return the mapping for a pointer argument.
unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isDivergentRegBank(const RegisterBank *RB) const override
Returns true if the register bank is considered divergent.
void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const
const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
const GCNSubtarget & Subtarget
const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const
bool isSALUMapping(const MachineInstr &MI) const
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const
bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
const SIRegisterInfo * TRI
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool hasScalarCompareEq64() const
bool hasScalarSubwordLoads() const
bool hasFullRate64Ops() const
bool hasScalarDwordx3Loads() const
bool hasScalarMulHiInsts() const
bool hasPseudoScalarTrans() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasSALUFloatInsts() const
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void createdInstr(MachineInstr &MI)=0
An instruction has been created and inserted into the function.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT getScalarType() const
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
TypeSize getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
const InstructionMapping & getInstrMapping() const
The final mapping of the instruction.
MachineInstr & getMI() const
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
const unsigned * Sizes
Hold the sizes of the register banks for all HwModes.
bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static IntegerType * getInt32Ty(LLVMContext &C)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isFlatGlobalAddrSpace(unsigned AS)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
auto reverse(ContainerTy &&C)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
const RegisterBank * RegBank
Register bank where the partial value lives.
unsigned Length
Length of this mapping in bits.
Helper struct that represents how a value is mapped through different register banks.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
The llvm::once_flag structure.