84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping()
override {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(
S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(
S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0, E =
MI.getNumExplicitDefs();
I != E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
334 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I],
Sizes[
I]);
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
478 switch (
MI.getOpcode()) {
479 case TargetOpcode::G_CONSTANT:
480 case TargetOpcode::G_IMPLICIT_DEF: {
484 { { AMDGPU::VGPRRegBankID }, 1 },
485 { { AMDGPU::SGPRRegBankID }, 1 },
486 { { AMDGPU::VCCRegBankID }, 1 }
489 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
494 case TargetOpcode::G_FCONSTANT:
495 case TargetOpcode::G_FRAME_INDEX:
496 case TargetOpcode::G_GLOBAL_VALUE: {
498 { { AMDGPU::VGPRRegBankID }, 1 },
499 { { AMDGPU::SGPRRegBankID }, 1 }
502 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
504 case TargetOpcode::G_AND:
505 case TargetOpcode::G_OR:
506 case TargetOpcode::G_XOR: {
513 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
514 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
515 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
521 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
522 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
523 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
534 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
535 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
536 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
542 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
543 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
544 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
549 case TargetOpcode::G_LOAD:
550 case TargetOpcode::G_ZEXTLOAD:
551 case TargetOpcode::G_SEXTLOAD: {
553 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
562 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
563 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
571 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
572 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
585 case TargetOpcode::G_SELECT: {
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
590 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
591 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
597 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
598 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
599 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
605 case TargetOpcode::G_UADDE:
606 case TargetOpcode::G_USUBE:
607 case TargetOpcode::G_SADDE:
608 case TargetOpcode::G_SSUBE: {
612 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
615 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
616 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
622 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
623 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
624 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
625 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
630 case AMDGPU::G_BRCOND: {
631 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
636 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
642 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
647 case AMDGPU::G_INTRINSIC:
648 case AMDGPU::G_INTRINSIC_CONVERGENT:
650 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
651 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
666 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
667 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
669 MRI->setRegBank(LoLHS, *Bank);
670 MRI->setRegBank(HiLHS, *Bank);
675 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
686 MRI.setType(Reg, NewTy);
706 LLT Ty =
MRI.getType(Src);
709 if (Bank == &AMDGPU::SGPRRegBank)
715 if (Bank != &AMDGPU::VGPRRegBank) {
717 Src =
B.buildCopy(Ty, Src).getReg(0);
718 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
722 unsigned NumParts = Bits / 32;
729 auto Unmerge =
B.buildUnmerge(
S32, Src);
730 for (
unsigned i = 0; i < NumParts; ++i)
734 for (
unsigned i = 0; i < NumParts; ++i) {
736 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737 MRI.setType(DstPart, NumParts == 1 ? Ty :
S32);
742 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
744 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
786 const unsigned MovExecOpc =
788 const unsigned MovExecTermOpc =
792 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
794 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
796 AMDGPU::EXEC_LO : AMDGPU::EXEC;
799 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
803 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
804 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
807 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
808 .addDef(InitSaveExecReg);
810 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
811 Register NewExec =
MRI.createVirtualRegister(WaveRC);
837 B.setInsertPt(*LoopBB, LoopBB->
end());
839 B.buildInstr(TargetOpcode::PHI)
841 .addReg(InitSaveExecReg)
856 auto NewEnd = BodyBB->
end();
863 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
868 if (!SGPROperandRegs.
count(OldReg))
873 auto OldVal = WaterfalledRegMap.
find(OldReg);
874 if (OldVal != WaterfalledRegMap.
end()) {
875 Op.setReg(OldVal->second);
880 LLT OpTy =
MRI.getType(OpReg);
883 if (OpBank != &AMDGPU::VGPRRegBank) {
886 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
887 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
895 bool Is64 = OpSize % 64 == 0;
896 unsigned PartSize = Is64 ? 64 : 32;
898 unsigned NumParts = OpSize / PartSize;
904 CurrentLaneParts.
push_back(CurrentLaneReg);
906 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
907 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
908 for (
unsigned i = 0; i < NumParts; ++i) {
910 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
911 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
912 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
916 for (
unsigned i = 0; i < NumParts; ++i) {
918 OpParts[i]).getReg(0);
919 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
924 CondReg =
B.buildAnd(
S1, CondReg, CmpReg).getReg(0);
925 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
929 Op.setReg(CurrentLaneReg);
932 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
937 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
941 MRI.setRegClass(CondReg, WaveRC);
944 B.buildInstr(AndSaveExecOpc)
948 MRI.setSimpleHint(NewExec, CondReg);
950 B.setInsertPt(*BodyBB, BodyBB->
end());
953 B.buildInstr(XorTermOpc)
962 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
969 B.setMBB(*RestoreExecBB);
970 B.buildInstr(MovExecTermOpc)
972 .addReg(SaveExecReg);
976 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
988 for (
unsigned Op : OpIndices) {
992 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
993 SGPROperandRegs.
insert(Reg);
997 return !SGPROperandRegs.
empty();
1017 Register Reg =
MI.getOperand(OpIdx).getReg();
1020 if (Bank == &AMDGPU::SGPRRegBank)
1024 MI.getOperand(OpIdx).setReg(Reg);
1036 assert(FirstSize % EltSize == 0);
1038 unsigned FirstPartNumElts = FirstSize / EltSize;
1039 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1060 const LLT LoadTy =
MRI.getType(DstReg);
1063 const unsigned MaxNonSmrdLoadSize = 128;
1067 if (DstBank == &AMDGPU::SGPRRegBank) {
1078 if (LoadSize == 32 &&
1082 if (LoadSize == 32 &&
1091 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1093 if (LoadSize == 32) {
1097 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1099 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1100 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1101 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1103 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1104 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1107 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1121 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1123 B.buildTrunc(
MI.getOperand(0), WideLoad);
1125 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1130 MI.eraseFromParent();
1135 if (LoadSize <= MaxNonSmrdLoadSize)
1141 if (SrcRegs.
empty())
1147 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1148 MRI.setType(BasePtrReg, PtrTy);
1154 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1155 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1156 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1157 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1169 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1180 const auto &TFI = *ST.getFrameLowering();
1185 "Stack grows upwards for AMDGPU");
1188 Register AllocSize =
MI.getOperand(1).getReg();
1193 if (SizeBank != &AMDGPU::SGPRRegBank) {
1194 auto WaveReduction =
1195 B.buildIntrinsic(Intrinsic::amdgcn_wave_reduce_umax, {
LLT::scalar(32)})
1198 AllocSize = WaveReduction.getReg(0);
1201 LLT PtrTy =
MRI.getType(Dst);
1206 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1208 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1209 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1211 auto OldSP =
B.buildCopy(PtrTy,
SPReg);
1212 if (Alignment > TFI.getStackAlign()) {
1213 auto StackAlignMask = (Alignment.
value() << ST.getWavefrontSizeLog2()) - 1;
1214 auto Tmp1 =
B.buildPtrAdd(PtrTy, OldSP,
1216 B.buildMaskLowPtrBits(Dst, Tmp1,
1217 Log2(Alignment) + ST.getWavefrontSizeLog2());
1219 B.buildCopy(Dst, OldSP);
1221 auto PtrAdd =
B.buildPtrAdd(PtrTy, Dst, ScaledSize);
1222 B.buildCopy(
SPReg, PtrAdd);
1223 MI.eraseFromParent();
1230 int RsrcIdx)
const {
1231 const int NumDefs =
MI.getNumExplicitDefs();
1235 RsrcIdx += NumDefs + 1;
1242 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1243 if (!
MI.getOperand(
I).isReg())
1247 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1259 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1263 if (std::optional<int64_t> Imm =
1267 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1268 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1269 InstOffsetVal = ImmOffset;
1271 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1272 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1273 return SOffset + ImmOffset;
1288 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1289 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1290 InstOffsetVal = ImmOffset;
1296 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1297 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1299 InstOffsetVal = ImmOffset;
1313 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1319 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1329 VOffsetReg = CombinedOffset;
1331 VOffsetReg =
B.buildCopy(
S32, CombinedOffset).getReg(0);
1332 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1335 SOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1336 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1342 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
1343 return AMDGPU::G_AMDGPU_BUFFER_LOAD;
1344 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
1345 return AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
1346 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
1347 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
1348 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
1349 return AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
1350 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
1351 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
1365 LLT Ty =
MRI.getType(Dst);
1371 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1372 OffsetBank == &AMDGPU::SGPRRegBank)
1380 if (LoadSize == 256 || LoadSize == 512) {
1381 NumLoads = LoadSize / 128;
1382 Ty = Ty.
divide(NumLoads);
1387 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1393 int64_t ImmOffset = 0;
1396 SOffset, ImmOffset, Alignment);
1401 const Align MemAlign(4);
1415 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1422 for (
int i = 0; i < NumLoads; ++i) {
1423 if (NumLoads == 1) {
1426 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1427 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1435 .addDef(LoadParts[i])
1440 .addImm(ImmOffset + 16 * i)
1443 .addMemOperand(MMO);
1449 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1452 B.setInstr(*Span.
begin());
1453 MI.eraseFromParent();
1457 OpsToWaterfall.
insert(RSrc);
1462 if (NumLoads != 1) {
1464 B.buildConcatVectors(Dst, LoadParts);
1466 B.buildMergeLikeInstr(Dst, LoadParts);
1470 if (RSrcBank == &AMDGPU::SGPRRegBank)
1471 MI.eraseFromParent();
1486 LLT Ty =
MRI.getType(DstReg);
1490 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1491 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1492 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1493 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1497 if (DstBank == &AMDGPU::VGPRRegBank) {
1503 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1507 auto ShiftOffset =
Signed ?
B.buildAShr(
S64, SrcReg, OffsetReg)
1508 :
B.buildLShr(
S64, SrcReg, OffsetReg);
1509 auto UnmergeSOffset =
B.buildUnmerge({
S32,
S32}, ShiftOffset);
1516 auto Zero =
B.buildConstant(
S32, 0);
1517 auto WidthImm = ConstWidth->Value.getZExtValue();
1518 if (WidthImm <= 32) {
1522 Signed ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1523 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1525 Signed ?
B.buildAShr(
S32, Extract,
B.buildConstant(
S32, 31)) : Zero;
1526 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1530 auto UpperWidth =
B.buildConstant(
S32, WidthImm - 32);
1533 ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1534 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1535 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1537 MI.eraseFromParent();
1543 auto ExtShift =
B.buildSub(
S32,
B.buildConstant(
S32, 64), WidthReg);
1544 auto SignBit =
B.buildShl(
S64, ShiftOffset, ExtShift);
1546 B.buildAShr(
S64, SignBit, ExtShift);
1548 B.buildLShr(
S64, SignBit, ExtShift);
1549 MI.eraseFromParent();
1555 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1558 auto OffsetMask =
B.buildConstant(
S32, maskTrailingOnes<unsigned>(6));
1559 auto ClampOffset =
B.buildAnd(
S32, OffsetReg, OffsetMask);
1562 auto ShiftWidth =
B.buildShl(
S32, WidthReg,
B.buildConstant(
S32, 16));
1567 auto MergedInputs =
B.buildOr(
S32, ClampOffset, ShiftWidth);
1571 unsigned Opc = Ty ==
S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1572 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1574 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1578 MI.eraseFromParent();
1596 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1599 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1603 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1604 bool Accumulate =
true;
1613 Register DstLo =
B.buildMul(
S32, Src0, Src1).getReg(0);
1614 bool MulHiInVgpr =
false;
1616 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1619 DstHi = IsUnsigned ?
B.buildUMulH(
S32, Src0, Src1).getReg(0)
1620 :
B.buildSMulH(
S32, Src0, Src1).getReg(0);
1621 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1626 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1627 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1629 DstHi = IsUnsigned ?
B.buildUMulH(
S32, VSrc0, VSrc1).getReg(0)
1630 :
B.buildSMulH(
S32, VSrc0, VSrc1).getReg(0);
1631 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1647 LLT CarryType = DstOnValu ?
S1 :
S32;
1649 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1651 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1656 Zero =
B.buildConstant(
S32, 0).getReg(0);
1657 MRI.setRegBank(Zero,
1658 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1662 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1663 : AMDGPU::SGPRRegBank);
1665 if (DstOnValu && !MulHiInVgpr) {
1666 Carry =
B.buildTrunc(
S1, Carry).getReg(0);
1667 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1673 DstLo =
B.buildCopy(
S32, DstLo).getReg(0);
1674 DstHi =
B.buildCopy(
S32, DstHi).getReg(0);
1675 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1676 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1679 auto Unmerge =
B.buildUnmerge(
S32, Src2);
1680 Register Src2Lo = Unmerge.getReg(0);
1681 Register Src2Hi = Unmerge.getReg(1);
1682 MRI.setRegBank(Src2Lo, DstBank);
1683 MRI.setRegBank(Src2Hi, DstBank);
1687 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1689 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1690 MRI.setRegBank(Carry, CarryBank);
1693 auto AddLo =
B.buildUAddo(
S32, CarryType, DstLo, Src2Lo);
1694 DstLo = AddLo.getReg(0);
1695 Register CarryLo = AddLo.getReg(1);
1696 MRI.setRegBank(DstLo, DstBank);
1697 MRI.setRegBank(CarryLo, CarryBank);
1699 auto AddHi =
B.buildUAdde(
S32, CarryType, DstHi, Src2Hi, CarryLo);
1700 DstHi = AddHi.getReg(0);
1701 MRI.setRegBank(DstHi, DstBank);
1703 Register CarryHi = AddHi.getReg(1);
1704 MRI.setRegBank(CarryHi, CarryBank);
1709 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1710 MRI.setRegBank(Carry, CarryBank);
1714 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1715 MRI.setRegBank(Carry, CarryBank);
1719 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1722 B.buildCopy(Dst1, Carry);
1724 B.buildTrunc(Dst1, Carry);
1727 MI.eraseFromParent();
1734 case TargetOpcode::G_ASHR:
1735 case TargetOpcode::G_SMIN:
1736 case TargetOpcode::G_SMAX:
1737 return TargetOpcode::G_SEXT;
1738 case TargetOpcode::G_LSHR:
1739 case TargetOpcode::G_UMIN:
1740 case TargetOpcode::G_UMAX:
1741 return TargetOpcode::G_ZEXT;
1743 return TargetOpcode::G_ANYEXT;
1749static std::pair<Register, Register>
1752 auto Bitcast =
B.buildBitcast(
S32, Src);
1754 if (ExtOpcode == TargetOpcode::G_SEXT) {
1755 auto ExtLo =
B.buildSExtInReg(
S32, Bitcast, 16);
1756 auto ShiftHi =
B.buildAShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1757 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1760 auto ShiftHi =
B.buildLShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1761 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1762 auto ExtLo =
B.buildAnd(
S32, Bitcast,
B.buildConstant(
S32, 0xffff));
1763 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1766 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1767 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1775 if (!SrcReg.
empty()) {
1792 LLT StoreVT =
MRI.getType(Reg);
1796 auto Unmerge =
B.buildUnmerge(
S16, Reg);
1800 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
1810static std::pair<Register, unsigned>
1814 return std::pair(
Register(), Const);
1818 return std::pair(
Base, Const);
1821 return std::pair(Reg, 0);
1824std::pair<Register, unsigned>
1837 if (ImmOffset != 0) {
1846 unsigned Overflow = ImmOffset & ~MaxImm;
1847 ImmOffset -= Overflow;
1848 if ((int32_t)Overflow < 0) {
1849 Overflow += ImmOffset;
1854 if (Overflow != 0) {
1856 BaseReg =
B.buildConstant(
S32, Overflow).getReg(0);
1858 auto OverflowVal =
B.buildConstant(
S32, Overflow);
1859 BaseReg =
B.buildAdd(
S32, BaseReg, OverflowVal).getReg(0);
1865 BaseReg =
B.buildConstant(
S32, 0).getReg(0);
1867 return {BaseReg, C1};
1873 LLT SrcTy =
MRI.getType(SrcReg);
1876 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1883 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1884 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1886 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1888 .addUse(SrcReg, 0, AMDGPU::sub0);
1889 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1891 .addUse(SrcReg, 0, AMDGPU::sub1);
1892 B.buildInstr(AMDGPU::REG_SEQUENCE)
1895 .addImm(AMDGPU::sub0)
1897 .addImm(AMDGPU::sub1);
1908 unsigned ConstOffset) {
1914 auto MaterializedOffset =
B.buildConstant(
S32, ConstOffset);
1916 auto Add =
B.buildAdd(
S32, WaterfallIdx, MaterializedOffset);
1917 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1918 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1930 bool IsBooleanSrc =
false) {
1931 if (ExtOpc == AMDGPU::G_ZEXT) {
1932 B.buildConstant(Hi32Reg, 0);
1933 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1937 B.buildCopy(Hi32Reg, Lo32Reg);
1941 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1942 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1945 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1946 B.buildUndef(Hi32Reg);
1950bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1952 const OperandsMapper &OpdMapper)
const {
1959 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1961 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1963 LLT VecTy =
MRI.getType(VecReg);
1974 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1976 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1979 (DstBank == AMDGPU::SGPRRegBank &&
1980 SrcBank == AMDGPU::SGPRRegBank &&
1981 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1982 : AMDGPU::VCCRegBank;
1985 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1986 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
1987 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1992 unsigned NumLanes = DstRegs.size();
1996 EltTy =
MRI.getType(DstRegs[0]);
1998 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2000 for (
unsigned L = 0;
L < NumLanes; ++
L)
2001 Res[L] = UnmergeToEltTy.getReg(L);
2003 for (
unsigned I = 1;
I < NumElem; ++
I) {
2004 auto IC =
B.buildConstant(
S32,
I);
2005 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2007 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2009 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2010 auto S =
B.buildSelect(EltTy, Cmp,
2011 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
2013 for (
unsigned N : { 0, 2, 3 })
2014 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
2016 Res[
L] = S->getOperand(0).getReg();
2020 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2021 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
2022 B.buildCopy(DstReg, Res[L]);
2023 MRI.setRegBank(DstReg, DstBank);
2026 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2027 MI.eraseFromParent();
2038 if (CurrBank && *CurrBank != Bank) {
2039 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2040 MRI.setRegBank(Copy, Bank);
2044 MRI.setRegBank(Reg, Bank);
2048bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2050 const OperandsMapper &OpdMapper)
const {
2057 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2059 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2061 LLT VecTy =
MRI.getType(VecReg);
2072 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2074 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2076 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2079 (DstBank == AMDGPU::SGPRRegBank &&
2080 SrcBank == AMDGPU::SGPRRegBank &&
2081 InsBank == AMDGPU::SGPRRegBank &&
2082 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2083 : AMDGPU::VCCRegBank;
2086 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2087 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
2088 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2093 unsigned NumLanes = InsRegs.size();
2096 InsRegs.push_back(
MI.getOperand(2).getReg());
2098 EltTy =
MRI.getType(InsRegs[0]);
2101 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2104 for (
unsigned I = 0;
I < NumElem; ++
I) {
2105 auto IC =
B.buildConstant(
S32,
I);
2106 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2108 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2110 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2112 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2123 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2124 B.buildBuildVector(
MI.getOperand(0), Ops);
2126 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2127 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2128 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2131 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2132 MI.eraseFromParent();
2145 if (DefRegs.
empty()) {
2153 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2164 if (Src0Regs.
empty())
2169 if (Src1Regs.
empty())
2192 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2194 Register Hi =
B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2195 Register MulLoHi =
B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2197 Register MulHiLo =
B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2198 B.buildAdd(DefRegs[1],
Add, MulHiLo);
2199 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2201 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2202 MI.eraseFromParent();
2208 B.setInstrAndDebugLoc(
MI);
2209 unsigned Opc =
MI.getOpcode();
2212 case AMDGPU::G_CONSTANT:
2213 case AMDGPU::G_IMPLICIT_DEF: {
2215 LLT DstTy =
MRI.getType(DstReg);
2221 if (DstBank == &AMDGPU::VCCRegBank)
2224 if (DefRegs.
empty())
2227 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2230 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2232 MI.getOperand(0).setReg(NewDstReg);
2233 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2234 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2235 MI.getOperand(1).setCImm(
2239 MRI.setRegBank(NewDstReg, *DstBank);
2240 B.buildTrunc(DefRegs[0], NewDstReg);
2243 case AMDGPU::G_PHI: {
2245 LLT DstTy =
MRI.getType(DstReg);
2252 if (DstBank == &AMDGPU::VCCRegBank) {
2259 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2263 if (SrcBank != &AMDGPU::VCCRegBank) {
2268 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2269 MI.getOperand(
I).setReg(Copy.getReg(0));
2280 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2281 B.setInsertPt(
B.getMBB(),
MI);
2289 case AMDGPU::G_FCMP:
2293 case AMDGPU::G_ICMP:
2294 case AMDGPU::G_UADDO:
2295 case AMDGPU::G_USUBO:
2296 case AMDGPU::G_UADDE:
2297 case AMDGPU::G_SADDE:
2298 case AMDGPU::G_USUBE:
2299 case AMDGPU::G_SSUBE: {
2300 unsigned BoolDstOp =
2301 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2302 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2306 if (DstBank != &AMDGPU::SGPRRegBank)
2309 const bool HasCarryIn =
MI.getNumOperands() == 5;
2315 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2316 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2320 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2321 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2322 MI.getOperand(4).setReg(NewSrcReg);
2326 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2331 if (DefRegs.
empty())
2333 B.buildTrunc(DefRegs[0], NewDstReg);
2336 case AMDGPU::G_SELECT: {
2338 LLT DstTy =
MRI.getType(DstReg);
2341 if (CondRegs.
empty())
2348 if (CondBank == &AMDGPU::SGPRRegBank) {
2351 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2353 MI.getOperand(1).setReg(NewCondReg);
2354 B.buildZExt(NewCondReg, CondRegs[0]);
2367 if (DefRegs.
empty()) {
2372 if (Src1Regs.
empty())
2378 if (Src2Regs.
empty())
2385 auto Flags =
MI.getFlags();
2386 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0], Flags);
2387 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1], Flags);
2389 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2390 MI.eraseFromParent();
2393 case AMDGPU::G_BRCOND: {
2394 Register CondReg =
MI.getOperand(0).getReg();
2399 if (CondBank == &AMDGPU::SGPRRegBank) {
2402 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2404 MI.getOperand(0).setReg(NewCondReg);
2405 B.buildZExt(NewCondReg, CondReg);
2413 case AMDGPU::G_XOR: {
2417 LLT DstTy =
MRI.getType(DstReg);
2422 if (DstBank == &AMDGPU::VCCRegBank)
2426 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2444 if (DefRegs.
empty()) {
2451 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2457 if (Src0Regs.
empty())
2462 if (Src1Regs.
empty())
2469 auto Flags =
MI.getFlags();
2470 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]}, Flags);
2471 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]}, Flags);
2473 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2474 MI.eraseFromParent();
2477 case AMDGPU::G_ABS: {
2483 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2485 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2498 case AMDGPU::G_LSHR:
2499 case AMDGPU::G_ASHR:
2500 case AMDGPU::G_SMIN:
2501 case AMDGPU::G_SMAX:
2502 case AMDGPU::G_UMIN:
2503 case AMDGPU::G_UMAX: {
2505 LLT DstTy =
MRI.getType(DstReg);
2522 if (DstBank == &AMDGPU::VGPRRegBank)
2528 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2530 if (DstTy.
isVector() && Opc == AMDGPU::G_ABS) {
2533 std::tie(WideSrcLo, WideSrcHi) =
2535 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2536 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2537 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2538 MI.eraseFromParent();
2547 std::tie(WideSrc0Lo, WideSrc0Hi)
2549 std::tie(WideSrc1Lo, WideSrc1Hi)
2551 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2552 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2553 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2554 MI.eraseFromParent();
2562 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2563 Opc == AMDGPU::G_ASHR) {
2564 B.setInsertPt(*
MBB,
MI.getIterator());
2572 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2573 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2587 Register SrcReg0 =
MI.getOperand(1).getReg();
2588 Register SrcReg1 =
MI.getOperand(2).getReg();
2591 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2592 "that handles only 64-bit operands.");
2598 if (DstBank == &AMDGPU::SGPRRegBank) {
2599 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2600 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2601 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2602 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2608 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2609 "The destination operand should be in vector registers.");
2614 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2615 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2617 B.buildTrunc(Op0L, SrcReg0);
2620 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2621 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2623 B.buildTrunc(Op1L, SrcReg1);
2625 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2626 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2627 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2631 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2632 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2633 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2634 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2635 MI.eraseFromParent();
2638 case AMDGPU::G_SEXT_INREG: {
2640 if (SrcRegs.
empty())
2644 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2651 int Amt =
MI.getOperand(2).getImm();
2657 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2659 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2661 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2664 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2668 B.buildCopy(DstRegs[0], SrcRegs[0]);
2669 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2673 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2674 MI.eraseFromParent();
2677 case AMDGPU::G_CTPOP:
2678 case AMDGPU::G_BITREVERSE: {
2681 if (DstBank == &AMDGPU::SGPRRegBank)
2686 LLT Ty =
MRI.getType(SrcReg);
2690 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2699 case AMDGPU::G_AMDGPU_FFBH_U32:
2700 case AMDGPU::G_AMDGPU_FFBL_B32:
2701 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2702 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2705 if (DstBank == &AMDGPU::SGPRRegBank)
2710 LLT Ty =
MRI.getType(SrcReg);
2720 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2722 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2723 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2724 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2725 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2727 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2728 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx]});
2729 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx ^ 1]});
2731 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2733 : AMDGPU::G_UADDSAT;
2734 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2736 B.buildUMin(DstReg,
X,
Y);
2737 MI.eraseFromParent();
2740 case AMDGPU::G_SEXT:
2741 case AMDGPU::G_ZEXT:
2742 case AMDGPU::G_ANYEXT: {
2744 LLT SrcTy =
MRI.getType(SrcReg);
2745 const bool Signed = Opc == AMDGPU::G_SEXT;
2753 LLT DstTy =
MRI.getType(DstReg);
2755 SrcBank != &AMDGPU::SGPRRegBank &&
2756 SrcBank != &AMDGPU::VCCRegBank &&
2766 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2767 }
else if (Opc == AMDGPU::G_ZEXT) {
2768 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2770 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2774 MRI.setRegBank(DstReg, *SrcBank);
2775 MI.eraseFromParent();
2785 if (SrcBank == &AMDGPU::VCCRegBank) {
2792 const bool UseSel64 = DstSize > 32 &&
2793 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2797 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2798 auto False =
B.buildConstant(SelType, 0);
2800 MRI.setRegBank(True.getReg(0), *DstBank);
2801 MRI.setRegBank(False.getReg(0), *DstBank);
2802 MRI.setRegBank(DstReg, *DstBank);
2805 B.buildSelect(DefRegs[0], SrcReg, True, False);
2807 }
else if (DstSize < 32) {
2808 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2809 MRI.setRegBank(Sel.getReg(0), *DstBank);
2810 B.buildTrunc(DstReg, Sel);
2812 B.buildSelect(DstReg, SrcReg, True, False);
2815 MI.eraseFromParent();
2821 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2830 LLT DstTy =
MRI.getType(DstReg);
2831 LLT SrcTy =
MRI.getType(SrcReg);
2833 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2845 unsigned ConstOffset;
2846 std::tie(BaseIdxReg, ConstOffset) =
2853 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2858 if (ShouldMoveIndexIntoLoop)
2859 MI.getOperand(2).setReg(BaseIdxReg);
2865 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2866 SrcBank == &AMDGPU::SGPRRegBank;
2867 if (DstRegs.
empty()) {
2872 if (NeedCopyToVGPR) {
2874 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2875 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2876 MI.getOperand(0).setReg(TmpReg);
2877 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2884 if (ShouldMoveIndexIntoLoop)
2894 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2895 auto One =
B.buildConstant(
S32, 1);
2906 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2907 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2909 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2910 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2912 MRI.setRegBank(DstReg, *DstBank);
2913 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2914 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2915 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2916 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2920 MI.eraseFromParent();
2926 B.setInstr(*Span.
begin());
2927 MI.eraseFromParent();
2931 if (NeedCopyToVGPR) {
2935 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2936 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2938 Extract0->getOperand(0).setReg(TmpReg0);
2939 Extract1->getOperand(0).setReg(TmpReg1);
2947 if (ShouldMoveIndexIntoLoop)
2952 case AMDGPU::G_INSERT_VECTOR_ELT: {
2956 LLT VecTy =
MRI.getType(DstReg);
2962 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2964 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2972 LLT InsTy =
MRI.getType(InsReg);
2976 unsigned ConstOffset;
2977 std::tie(BaseIdxReg, ConstOffset) =
2984 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2989 if (ShouldMoveIndexIntoLoop)
2990 MI.getOperand(3).setReg(BaseIdxReg);
2993 if (InsRegs.
empty()) {
2997 if (ShouldMoveIndexIntoLoop) {
3009 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
3010 auto One =
B.buildConstant(
S32, 1);
3019 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
3020 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
3022 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
3023 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3032 MRI.setRegBank(InsReg, *InsSrcBank);
3033 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3034 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3035 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3036 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3037 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3038 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3043 B.setInsertPt(
B.getMBB(),
MI);
3044 B.buildBitcast(DstReg, InsHi);
3045 MI.eraseFromParent();
3049 B.setInstr(*Span.
begin());
3050 MI.eraseFromParent();
3061 B.buildBitcast(DstReg, InsHi);
3064 if (ShouldMoveIndexIntoLoop)
3069 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3070 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3071 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3072 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3073 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3074 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3075 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3076 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3077 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3078 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3079 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3080 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3081 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3082 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3083 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3084 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3085 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3086 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3087 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3088 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3089 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3090 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3095 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3096 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3097 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3098 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3099 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3100 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3101 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3102 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3103 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3104 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3105 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3106 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
3107 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3108 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3109 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3114 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3119 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3120 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3121 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3122 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3123 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3127 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
3131 case AMDGPU::G_INTRINSIC:
3132 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3134 case Intrinsic::amdgcn_readlane: {
3145 case Intrinsic::amdgcn_writelane: {
3155 case Intrinsic::amdgcn_interp_p1:
3156 case Intrinsic::amdgcn_interp_p2:
3157 case Intrinsic::amdgcn_interp_mov:
3158 case Intrinsic::amdgcn_interp_p1_f16:
3159 case Intrinsic::amdgcn_interp_p2_f16:
3160 case Intrinsic::amdgcn_lds_param_load: {
3168 case Intrinsic::amdgcn_interp_inreg_p10:
3169 case Intrinsic::amdgcn_interp_inreg_p2:
3170 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3171 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3172 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3173 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3174 case Intrinsic::amdgcn_permlane16_swap:
3175 case Intrinsic::amdgcn_permlane32_swap:
3178 case Intrinsic::amdgcn_permlane16:
3179 case Intrinsic::amdgcn_permlanex16: {
3187 case Intrinsic::amdgcn_sbfe:
3190 case Intrinsic::amdgcn_ubfe:
3193 case Intrinsic::amdgcn_inverse_ballot:
3194 case Intrinsic::amdgcn_s_bitreplicate:
3195 case Intrinsic::amdgcn_s_quadmask:
3196 case Intrinsic::amdgcn_s_wqm:
3200 case Intrinsic::amdgcn_ballot:
3206 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3207 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3208 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3209 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3210 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3220 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3221 unsigned N =
MI.getNumExplicitOperands() - 2;
3226 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3227 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3228 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3230 case Intrinsic::amdgcn_ds_ordered_add:
3231 case Intrinsic::amdgcn_ds_ordered_swap: {
3238 case Intrinsic::amdgcn_ds_gws_init:
3239 case Intrinsic::amdgcn_ds_gws_barrier:
3240 case Intrinsic::amdgcn_ds_gws_sema_br: {
3246 case Intrinsic::amdgcn_ds_gws_sema_v:
3247 case Intrinsic::amdgcn_ds_gws_sema_p:
3248 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3253 case Intrinsic::amdgcn_ds_append:
3254 case Intrinsic::amdgcn_ds_consume: {
3258 case Intrinsic::amdgcn_s_sendmsg:
3259 case Intrinsic::amdgcn_s_sendmsghalt: {
3264 case Intrinsic::amdgcn_s_setreg: {
3268 case Intrinsic::amdgcn_s_ttracedata:
3271 case Intrinsic::amdgcn_raw_buffer_load_lds:
3272 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3279 case Intrinsic::amdgcn_struct_buffer_load_lds:
3280 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3287 case Intrinsic::amdgcn_global_load_lds: {
3292 case Intrinsic::amdgcn_lds_direct_load: {
3298 case Intrinsic::amdgcn_exp_row:
3302 case Intrinsic::amdgcn_s_sleep_var:
3306 case Intrinsic::amdgcn_s_barrier_join:
3307 case Intrinsic::amdgcn_s_wakeup_barrier:
3310 case Intrinsic::amdgcn_s_barrier_init:
3311 case Intrinsic::amdgcn_s_barrier_signal_var:
3315 case Intrinsic::amdgcn_s_get_barrier_state:
3316 case Intrinsic::amdgcn_s_get_named_barrier_state: {
3320 case Intrinsic::amdgcn_s_prefetch_data: {
3322 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3327 MI.eraseFromParent();
3336 if (RSrcIntrin->IsImage) {
3347 case AMDGPU::G_SI_CALL: {
3358 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3359 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3365 unsigned NonCopyInstrsLen = 0;
3371 while (Start->getOpcode() != FrameSetupOpcode) {
3373 bool IsCopy =
false;
3374 if (Start->getOpcode() == AMDGPU::COPY) {
3375 auto &Dst = Start->getOperand(0);
3378 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3383 auto &Src = Start->getOperand(1);
3386 IsCopy =
Info->getScratchRSrcReg() == Reg;
3394 NonCopyInstrsLen = NonCopyInstrs.
size();
3399 NonCopyInstrs.
resize(NonCopyInstrsLen);
3401 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3407 NonCopyInstrs.
clear();
3408 NonCopyInstrsLen = 0;
3411 while (
End->getOpcode() != FrameDestroyOpcode) {
3413 bool IsCopy =
false;
3414 if (
End->getOpcode() == AMDGPU::COPY) {
3415 auto &Src =
End->getOperand(1);
3418 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3424 NonCopyInstrsLen = NonCopyInstrs.
size();
3429 NonCopyInstrs.
resize(NonCopyInstrsLen);
3433 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3438 B.setInsertPt(
B.getMBB(), Start);
3442 case AMDGPU::G_LOAD:
3443 case AMDGPU::G_ZEXTLOAD:
3444 case AMDGPU::G_SEXTLOAD: {
3449 case AMDGPU::G_DYN_STACKALLOC:
3452 case AMDGPU::G_STACKRESTORE: {
3457 case AMDGPU::G_SBFX:
3460 case AMDGPU::G_UBFX:
3463 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3464 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3467 case AMDGPU::G_PREFETCH: {
3469 MI.eraseFromParent();
3474 if (PtrBank == AMDGPU::VGPRRegBankID) {
3475 MI.eraseFromParent();
3478 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3481 MI.eraseFromParent();
3499 if (RB0 == AMDGPU::InvalidRegBankID)
3501 if (RB1 == AMDGPU::InvalidRegBankID)
3504 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3505 return AMDGPU::SGPRRegBankID;
3507 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3508 return AMDGPU::AGPRRegBankID;
3510 return AMDGPU::VGPRRegBankID;
3514 if (RB0 == AMDGPU::InvalidRegBankID)
3516 if (RB1 == AMDGPU::InvalidRegBankID)
3522 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3523 return AMDGPU::VCCRegBankID;
3531 unsigned RegBank = AMDGPU::InvalidRegBankID;
3539 if (RegBank == AMDGPU::VGPRRegBankID)
3555 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3568 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3574 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3577 MI.getNumOperands());
3590 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3596 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3597 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3601 MI.getNumOperands());
3610 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
3616 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3620 MI.getNumOperands());
3626 int RsrcIdx)
const {
3629 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3631 const int NumOps =
MI.getNumOperands();
3636 for (
int I = 0;
I != NumOps; ++
I) {
3637 if (!
MI.getOperand(
I).isReg())
3651 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3656 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3659 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3670 LLT PtrTy =
MRI.getType(PtrReg);
3674 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3679 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3690 LLT PtrTy =
MRI.getType(PtrReg);
3702 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3703 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3705 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3710 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3712 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3715 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3716 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3719 OpdsMapping[0] = ValMapping;
3720 OpdsMapping[1] = PtrMapping;
3745 return AMDGPU::getValueMapping(Bank,
Size);
3753 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3761 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3778 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3786 assert(SrcBank &&
"src bank should have been assigned already");
3793 DstBank = &AMDGPU::VCCRegBank;
3795 DstBank = &AMDGPU::VCCRegBank;
3801 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3806 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3808 OpdsMapping[0] = &ValMap;
3809 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3810 OpdsMapping[1] = &ValMap;
3817 if (
MI.isRegSequence()) {
3820 unsigned BankID = AMDGPU::SGPRRegBankID;
3822 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3826 if (OpBank != AMDGPU::SGPRRegBankID) {
3827 BankID = AMDGPU::VGPRRegBankID;
3843 if (
auto *
PHI = dyn_cast<GPhi>(&
MI)) {
3844 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3849 ResultBank = DstBank->
getID();
3851 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3856 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3857 ResultBank = AMDGPU::VGPRRegBankID;
3862 unsigned OpBank = Bank->
getID();
3866 assert(ResultBank != AMDGPU::InvalidRegBankID);
3868 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3883 switch (
MI.getOpcode()) {
3890 case AMDGPU::G_MUL: {
3891 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3896 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3897 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3898 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3900 TargetBankID = DstBank->
getID();
3901 if (DstBank == &AMDGPU::VCCRegBank) {
3902 TargetBankID = AMDGPU::VCCRegBankID;
3903 BankLHS = AMDGPU::VCCRegBankID;
3904 BankRHS = AMDGPU::VCCRegBankID;
3907 AMDGPU::SGPRRegBankID);
3909 AMDGPU::SGPRRegBankID);
3913 AMDGPU::VCCRegBankID);
3915 AMDGPU::VCCRegBankID);
3918 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3919 TargetBankID = AMDGPU::VGPRRegBankID;
3920 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3921 TargetBankID = AMDGPU::VCCRegBankID;
3922 BankLHS = AMDGPU::VCCRegBankID;
3923 BankRHS = AMDGPU::VCCRegBankID;
3924 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3925 TargetBankID = AMDGPU::SGPRRegBankID;
3929 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3930 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3931 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3938 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3939 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3941 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3943 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3946 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3954 case AMDGPU::G_PTR_ADD:
3955 case AMDGPU::G_PTRMASK:
3959 case AMDGPU::G_LSHR:
3960 case AMDGPU::G_ASHR:
3961 case AMDGPU::G_UADDO:
3962 case AMDGPU::G_USUBO:
3963 case AMDGPU::G_UADDE:
3964 case AMDGPU::G_SADDE:
3965 case AMDGPU::G_USUBE:
3966 case AMDGPU::G_SSUBE:
3967 case AMDGPU::G_SMIN:
3968 case AMDGPU::G_SMAX:
3969 case AMDGPU::G_UMIN:
3970 case AMDGPU::G_UMAX:
3972 case AMDGPU::G_SHUFFLE_VECTOR:
3973 case AMDGPU::G_SBFX:
3974 case AMDGPU::G_UBFX:
3975 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
3976 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
3980 case AMDGPU::G_FADD:
3981 case AMDGPU::G_FSUB:
3982 case AMDGPU::G_FMUL:
3984 case AMDGPU::G_FFLOOR:
3985 case AMDGPU::G_FCEIL:
3986 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
3987 case AMDGPU::G_FMINNUM:
3988 case AMDGPU::G_FMAXNUM:
3989 case AMDGPU::G_FMINIMUM:
3990 case AMDGPU::G_FMAXIMUM:
3991 case AMDGPU::G_INTRINSIC_TRUNC:
3992 case AMDGPU::G_STRICT_FADD:
3993 case AMDGPU::G_STRICT_FSUB:
3994 case AMDGPU::G_STRICT_FMUL:
3995 case AMDGPU::G_STRICT_FMA: {
3996 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
4003 case AMDGPU::G_FPTOSI:
4004 case AMDGPU::G_FPTOUI:
4005 case AMDGPU::G_SITOFP:
4006 case AMDGPU::G_UITOFP: {
4007 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4008 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4014 case AMDGPU::G_FPTRUNC:
4015 case AMDGPU::G_FPEXT: {
4016 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4017 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4023 case AMDGPU::G_FSQRT:
4024 case AMDGPU::G_FEXP2:
4025 case AMDGPU::G_FLOG2: {
4026 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4032 case AMDGPU::G_SADDSAT:
4033 case AMDGPU::G_SSUBSAT:
4034 case AMDGPU::G_UADDSAT:
4035 case AMDGPU::G_USUBSAT:
4036 case AMDGPU::G_FMAD:
4037 case AMDGPU::G_FLDEXP:
4038 case AMDGPU::G_FMINNUM_IEEE:
4039 case AMDGPU::G_FMAXNUM_IEEE:
4040 case AMDGPU::G_FCANONICALIZE:
4041 case AMDGPU::G_STRICT_FLDEXP:
4042 case AMDGPU::G_BSWAP:
4043 case AMDGPU::G_FSHR:
4044 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
4045 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
4046 case AMDGPU::G_AMDGPU_RCP_IFLAG:
4047 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4048 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4049 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4050 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4051 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4052 case AMDGPU::G_AMDGPU_SMED3:
4053 case AMDGPU::G_AMDGPU_FMED3:
4055 case AMDGPU::G_UMULH:
4056 case AMDGPU::G_SMULH: {
4061 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4062 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4071 bool AllSalu =
true;
4072 bool MulSalu =
true;
4073 for (
unsigned i = 0; i < 5; ++i) {
4076 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4078 if (i == 2 || i == 3) {
4096 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4097 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4098 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4099 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4100 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4103 case AMDGPU::G_IMPLICIT_DEF: {
4104 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4105 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4108 case AMDGPU::G_FCONSTANT:
4109 case AMDGPU::G_CONSTANT:
4110 case AMDGPU::G_GLOBAL_VALUE:
4111 case AMDGPU::G_FRAME_INDEX:
4112 case AMDGPU::G_BLOCK_ADDR:
4113 case AMDGPU::G_READSTEADYCOUNTER:
4114 case AMDGPU::G_READCYCLECOUNTER: {
4115 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4116 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4119 case AMDGPU::G_DYN_STACKALLOC: {
4121 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4123 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4126 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4131 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4132 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4135 case AMDGPU::G_INSERT: {
4140 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4141 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4142 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4143 OpdsMapping[3] =
nullptr;
4146 case AMDGPU::G_EXTRACT: {
4150 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4151 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4152 OpdsMapping[2] =
nullptr;
4155 case AMDGPU::G_BUILD_VECTOR:
4156 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4157 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4160 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4163 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4165 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4166 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4167 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4173 case AMDGPU::G_MERGE_VALUES:
4174 case AMDGPU::G_CONCAT_VECTORS: {
4176 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4177 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4179 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4181 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4182 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4185 case AMDGPU::G_BITREVERSE:
4186 case AMDGPU::G_BITCAST:
4187 case AMDGPU::G_INTTOPTR:
4188 case AMDGPU::G_PTRTOINT:
4189 case AMDGPU::G_FABS:
4190 case AMDGPU::G_FNEG: {
4191 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4193 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4196 case AMDGPU::G_AMDGPU_FFBH_U32:
4197 case AMDGPU::G_AMDGPU_FFBL_B32:
4198 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4199 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4200 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4202 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4203 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4206 case AMDGPU::G_CTPOP: {
4207 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4209 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4214 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4217 case AMDGPU::G_TRUNC: {
4223 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4224 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4227 case AMDGPU::G_ZEXT:
4228 case AMDGPU::G_SEXT:
4229 case AMDGPU::G_ANYEXT:
4230 case AMDGPU::G_SEXT_INREG: {
4239 switch (SrcBank->
getID()) {
4240 case AMDGPU::SGPRRegBankID:
4241 DstBank = AMDGPU::SGPRRegBankID;
4244 DstBank = AMDGPU::VGPRRegBankID;
4250 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4251 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4255 case AMDGPU::G_IS_FPCLASS: {
4257 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4258 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4259 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4260 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4263 case AMDGPU::G_STORE: {
4265 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4270 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4271 OpdsMapping[0] = ValMapping;
4275 case AMDGPU::G_ICMP:
4276 case AMDGPU::G_FCMP: {
4277 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4282 AMDGPU::SGPRRegBankID);
4286 auto canUseSCCICMP = [&]() {
4289 return Size == 32 ||
4294 auto canUseSCCFCMP = [&]() {
4298 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4299 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4300 Op2Bank == AMDGPU::SGPRRegBankID &&
4301 Op3Bank == AMDGPU::SGPRRegBankID &&
4302 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4304 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4305 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4309 const unsigned ResultSize = 1;
4311 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4312 OpdsMapping[1] =
nullptr;
4313 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4314 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4317 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4320 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4321 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4322 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4324 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4326 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4327 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4330 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4333 case AMDGPU::G_INSERT_VECTOR_ELT: {
4335 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4337 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4338 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4339 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4343 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4344 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4348 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4349 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4352 assert(InsertSize == 32 || InsertSize == 64);
4353 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4357 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4360 case AMDGPU::G_UNMERGE_VALUES: {
4365 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4367 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4371 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4372 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4373 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4374 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4375 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4376 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4377 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4378 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4379 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4380 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4381 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4382 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4383 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4384 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4385 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4386 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4387 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4388 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4389 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4390 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4391 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4392 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4411 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4412 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4413 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4414 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4415 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4416 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4417 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4418 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4419 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4420 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4421 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4422 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4423 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4424 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4425 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4448 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4474 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4475 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4476 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4477 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4478 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4486 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4487 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4488 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4490 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4491 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4494 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
4498 case AMDGPU::G_INTRINSIC:
4499 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4503 case Intrinsic::amdgcn_div_fmas:
4504 case Intrinsic::amdgcn_div_fixup:
4505 case Intrinsic::amdgcn_trig_preop:
4506 case Intrinsic::amdgcn_sin:
4507 case Intrinsic::amdgcn_cos:
4508 case Intrinsic::amdgcn_log_clamp:
4509 case Intrinsic::amdgcn_rcp_legacy:
4510 case Intrinsic::amdgcn_rsq_legacy:
4511 case Intrinsic::amdgcn_rsq_clamp:
4512 case Intrinsic::amdgcn_fmul_legacy:
4513 case Intrinsic::amdgcn_fma_legacy:
4514 case Intrinsic::amdgcn_frexp_mant:
4515 case Intrinsic::amdgcn_frexp_exp:
4516 case Intrinsic::amdgcn_fract:
4517 case Intrinsic::amdgcn_cvt_pknorm_i16:
4518 case Intrinsic::amdgcn_cvt_pknorm_u16:
4519 case Intrinsic::amdgcn_cvt_pk_i16:
4520 case Intrinsic::amdgcn_cvt_pk_u16:
4521 case Intrinsic::amdgcn_fmed3:
4522 case Intrinsic::amdgcn_cubeid:
4523 case Intrinsic::amdgcn_cubema:
4524 case Intrinsic::amdgcn_cubesc:
4525 case Intrinsic::amdgcn_cubetc:
4526 case Intrinsic::amdgcn_sffbh:
4527 case Intrinsic::amdgcn_fmad_ftz:
4528 case Intrinsic::amdgcn_mbcnt_lo:
4529 case Intrinsic::amdgcn_mbcnt_hi:
4530 case Intrinsic::amdgcn_mul_u24:
4531 case Intrinsic::amdgcn_mul_i24:
4532 case Intrinsic::amdgcn_mulhi_u24:
4533 case Intrinsic::amdgcn_mulhi_i24:
4534 case Intrinsic::amdgcn_lerp:
4535 case Intrinsic::amdgcn_sad_u8:
4536 case Intrinsic::amdgcn_msad_u8:
4537 case Intrinsic::amdgcn_sad_hi_u8:
4538 case Intrinsic::amdgcn_sad_u16:
4539 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4540 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4541 case Intrinsic::amdgcn_mqsad_u32_u8:
4542 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4543 case Intrinsic::amdgcn_alignbyte:
4544 case Intrinsic::amdgcn_perm:
4545 case Intrinsic::amdgcn_prng_b32:
4546 case Intrinsic::amdgcn_fdot2:
4547 case Intrinsic::amdgcn_sdot2:
4548 case Intrinsic::amdgcn_udot2:
4549 case Intrinsic::amdgcn_sdot4:
4550 case Intrinsic::amdgcn_udot4:
4551 case Intrinsic::amdgcn_sdot8:
4552 case Intrinsic::amdgcn_udot8:
4553 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4554 case Intrinsic::amdgcn_fdot2_f16_f16:
4555 case Intrinsic::amdgcn_fdot2_f32_bf16:
4556 case Intrinsic::amdgcn_fdot2c_f32_bf16:
4557 case Intrinsic::amdgcn_sudot4:
4558 case Intrinsic::amdgcn_sudot8:
4559 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4560 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4561 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4562 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4563 case Intrinsic::amdgcn_cvt_f32_fp8:
4564 case Intrinsic::amdgcn_cvt_f32_bf8:
4565 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4566 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4567 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4568 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4569 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4570 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4571 case Intrinsic::amdgcn_cvt_sr_bf16_f32:
4572 case Intrinsic::amdgcn_cvt_sr_f16_f32:
4573 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
4574 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
4575 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:
4576 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16:
4577 case Intrinsic::amdgcn_cvt_scalef32_f16_fp8:
4578 case Intrinsic::amdgcn_cvt_scalef32_f16_bf8:
4579 case Intrinsic::amdgcn_cvt_scalef32_f32_fp8:
4580 case Intrinsic::amdgcn_cvt_scalef32_f32_bf8:
4581 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f32:
4582 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f32:
4583 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp8:
4584 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_bf8:
4585 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f16:
4586 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_bf16:
4587 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f16:
4588 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_bf16:
4589 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp4:
4590 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f32:
4591 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp4:
4592 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp4:
4593 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_fp6:
4594 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_bf6:
4595 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_bf6:
4596 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_bf6:
4597 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_fp6:
4598 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_fp6:
4599 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_bf8:
4600 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_bf8:
4601 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp8:
4602 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp8:
4603 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f16:
4604 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_bf16:
4605 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
4606 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
4607 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
4608 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
4609 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
4610 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
4611 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
4612 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
4613 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
4614 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_bf16:
4615 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f16:
4616 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f32:
4617 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_bf16:
4618 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f16:
4619 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f32:
4620 case Intrinsic::amdgcn_ashr_pk_i8_i32:
4621 case Intrinsic::amdgcn_ashr_pk_u8_i32:
4622 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:
4623 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32:
4624 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4625 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4626 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4627 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4628 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4629 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4630 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4631 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4632 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4633 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4634 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4635 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4636 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4637 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4638 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4639 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4640 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4641 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4642 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4643 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4644 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4645 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4646 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4647 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4649 case Intrinsic::amdgcn_log:
4650 case Intrinsic::amdgcn_exp2:
4651 case Intrinsic::amdgcn_rcp:
4652 case Intrinsic::amdgcn_rsq:
4653 case Intrinsic::amdgcn_sqrt: {
4654 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4660 case Intrinsic::amdgcn_sbfe:
4661 case Intrinsic::amdgcn_ubfe:
4665 case Intrinsic::amdgcn_ds_swizzle:
4666 case Intrinsic::amdgcn_ds_permute:
4667 case Intrinsic::amdgcn_ds_bpermute:
4668 case Intrinsic::amdgcn_update_dpp:
4669 case Intrinsic::amdgcn_mov_dpp8:
4670 case Intrinsic::amdgcn_mov_dpp:
4671 case Intrinsic::amdgcn_strict_wwm:
4672 case Intrinsic::amdgcn_wwm:
4673 case Intrinsic::amdgcn_strict_wqm:
4674 case Intrinsic::amdgcn_wqm:
4675 case Intrinsic::amdgcn_softwqm:
4676 case Intrinsic::amdgcn_set_inactive:
4677 case Intrinsic::amdgcn_set_inactive_chain_arg:
4678 case Intrinsic::amdgcn_permlane64:
4680 case Intrinsic::amdgcn_cvt_pkrtz:
4684 case Intrinsic::amdgcn_kernarg_segment_ptr:
4685 case Intrinsic::amdgcn_s_getpc:
4686 case Intrinsic::amdgcn_groupstaticsize:
4687 case Intrinsic::amdgcn_reloc_constant:
4688 case Intrinsic::returnaddress: {
4689 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4690 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4693 case Intrinsic::amdgcn_wqm_vote: {
4694 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4695 OpdsMapping[0] = OpdsMapping[2]
4696 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4699 case Intrinsic::amdgcn_ps_live: {
4700 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4703 case Intrinsic::amdgcn_div_scale: {
4704 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4705 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4706 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4707 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4709 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4710 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4711 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4714 case Intrinsic::amdgcn_class: {
4715 Register Src0Reg =
MI.getOperand(2).getReg();
4716 Register Src1Reg =
MI.getOperand(3).getReg();
4717 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4718 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4719 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4720 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4721 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4722 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4725 case Intrinsic::amdgcn_icmp:
4726 case Intrinsic::amdgcn_fcmp: {
4727 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4729 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4730 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4731 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4732 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4735 case Intrinsic::amdgcn_readlane: {
4738 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4740 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4743 case Intrinsic::amdgcn_readfirstlane: {
4744 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4745 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4746 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4747 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4750 case Intrinsic::amdgcn_writelane: {
4751 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4753 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4756 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4758 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4762 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4763 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4764 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4767 case Intrinsic::amdgcn_if_break: {
4769 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4770 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4771 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4774 case Intrinsic::amdgcn_permlane16:
4775 case Intrinsic::amdgcn_permlanex16: {
4777 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4778 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4779 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4784 case Intrinsic::amdgcn_permlane16_var:
4785 case Intrinsic::amdgcn_permlanex16_var: {
4787 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4788 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4789 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4790 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4793 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4794 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4795 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4796 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4797 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4798 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4799 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4800 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4801 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4802 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4803 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4804 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4805 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4806 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4807 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4808 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4809 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4810 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4811 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4812 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4813 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4814 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4815 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4816 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4817 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4818 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4819 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4820 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4821 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4822 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4823 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4824 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4825 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4826 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4827 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4828 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4829 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4830 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4831 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8:
4832 case Intrinsic::amdgcn_mfma_f32_16x16x32_f16:
4833 case Intrinsic::amdgcn_mfma_f32_32x32x16_f16:
4834 case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
4835 case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
4836 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
4845 Info->mayNeedAGPRs()
4851 Info->mayNeedAGPRs()
4856 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
4857 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
4860 Info->mayNeedAGPRs()
4867 Info->mayNeedAGPRs()
4875 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
4876 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
4877 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
4878 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
4879 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
4880 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
4881 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
4882 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
4883 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
4884 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
4885 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
4886 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
4887 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
4888 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
4889 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
4890 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
4891 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
4892 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
4893 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
4894 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
4895 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
4896 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
4897 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
4898 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
4899 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
4900 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
4901 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
4902 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
4911 case Intrinsic::amdgcn_interp_p1:
4912 case Intrinsic::amdgcn_interp_p2:
4913 case Intrinsic::amdgcn_interp_mov:
4914 case Intrinsic::amdgcn_interp_p1_f16:
4915 case Intrinsic::amdgcn_interp_p2_f16:
4916 case Intrinsic::amdgcn_lds_param_load: {
4917 const int M0Idx =
MI.getNumOperands() - 1;
4918 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4920 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4922 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4923 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4924 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4928 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4931 case Intrinsic::amdgcn_interp_inreg_p10:
4932 case Intrinsic::amdgcn_interp_inreg_p2:
4933 case Intrinsic::amdgcn_interp_inreg_p10_f16:
4934 case Intrinsic::amdgcn_interp_inreg_p2_f16:
4935 case Intrinsic::amdgcn_interp_p10_rtz_f16:
4936 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
4937 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4938 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4939 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4940 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4941 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4944 case Intrinsic::amdgcn_permlane16_swap:
4945 case Intrinsic::amdgcn_permlane32_swap: {
4946 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4947 OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] =
4948 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4951 case Intrinsic::amdgcn_ballot: {
4952 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4953 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4954 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4955 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
4958 case Intrinsic::amdgcn_inverse_ballot: {
4960 Register MaskReg =
MI.getOperand(2).getReg();
4961 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4962 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4963 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4964 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4967 case Intrinsic::amdgcn_bitop3: {
4969 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4970 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4971 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4972 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4975 case Intrinsic::amdgcn_s_quadmask:
4976 case Intrinsic::amdgcn_s_wqm: {
4977 Register MaskReg =
MI.getOperand(2).getReg();
4978 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4979 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4980 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
4981 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4984 case Intrinsic::amdgcn_wave_reduce_umin:
4985 case Intrinsic::amdgcn_wave_reduce_umax: {
4986 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4987 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4988 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4991 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
4994 case Intrinsic::amdgcn_s_bitreplicate:
4995 Register MaskReg =
MI.getOperand(2).getReg();
4996 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4997 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
4998 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
5002 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
5003 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
5004 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
5005 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
5006 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
5009 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
5016 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
5017 unsigned N =
MI.getNumExplicitOperands() - 2;
5018 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
5022 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5025 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5028 for (
unsigned I = 2;
I <
N; ++
I) {
5029 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
5030 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5035 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
5036 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
5037 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
5039 case Intrinsic::amdgcn_s_getreg:
5040 case Intrinsic::amdgcn_s_memtime:
5041 case Intrinsic::amdgcn_s_memrealtime:
5042 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
5043 case Intrinsic::amdgcn_s_sendmsg_rtn: {
5044 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5045 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5048 case Intrinsic::amdgcn_global_atomic_csub:
5049 case Intrinsic::amdgcn_global_atomic_fmin_num:
5050 case Intrinsic::amdgcn_global_atomic_fmax_num:
5051 case Intrinsic::amdgcn_flat_atomic_fmin_num:
5052 case Intrinsic::amdgcn_flat_atomic_fmax_num:
5053 case Intrinsic::amdgcn_atomic_cond_sub_u32:
5054 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
5055 case Intrinsic::amdgcn_global_load_tr_b64:
5056 case Intrinsic::amdgcn_global_load_tr_b128:
5057 case Intrinsic::amdgcn_ds_read_tr4_b64:
5058 case Intrinsic::amdgcn_ds_read_tr6_b96:
5059 case Intrinsic::amdgcn_ds_read_tr8_b64:
5060 case Intrinsic::amdgcn_ds_read_tr16_b64:
5062 case Intrinsic::amdgcn_ds_ordered_add:
5063 case Intrinsic::amdgcn_ds_ordered_swap: {
5064 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5065 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5067 AMDGPU::SGPRRegBankID);
5068 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
5069 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5072 case Intrinsic::amdgcn_ds_append:
5073 case Intrinsic::amdgcn_ds_consume: {
5074 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5075 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5079 case Intrinsic::amdgcn_exp_compr:
5080 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5081 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5083 case Intrinsic::amdgcn_exp:
5085 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5086 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5087 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5088 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5090 case Intrinsic::amdgcn_exp_row:
5091 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5092 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5093 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5094 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5097 case Intrinsic::amdgcn_s_sendmsg:
5098 case Intrinsic::amdgcn_s_sendmsghalt: {
5101 AMDGPU::SGPRRegBankID);
5102 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5105 case Intrinsic::amdgcn_s_setreg: {
5108 AMDGPU::SGPRRegBankID);
5109 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5112 case Intrinsic::amdgcn_s_ttracedata: {
5116 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5119 case Intrinsic::amdgcn_end_cf: {
5121 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5124 case Intrinsic::amdgcn_else: {
5126 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5127 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5128 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5131 case Intrinsic::amdgcn_init_whole_wave:
5132 case Intrinsic::amdgcn_live_mask: {
5133 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5136 case Intrinsic::amdgcn_wqm_demote:
5137 case Intrinsic::amdgcn_kill: {
5138 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5141 case Intrinsic::amdgcn_raw_buffer_load:
5142 case Intrinsic::amdgcn_raw_ptr_buffer_load:
5143 case Intrinsic::amdgcn_raw_atomic_buffer_load:
5144 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
5145 case Intrinsic::amdgcn_raw_tbuffer_load:
5146 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
5155 case Intrinsic::amdgcn_raw_buffer_load_lds:
5156 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
5163 case Intrinsic::amdgcn_raw_buffer_store:
5164 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5165 case Intrinsic::amdgcn_raw_buffer_store_format:
5166 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5167 case Intrinsic::amdgcn_raw_tbuffer_store:
5168 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5175 case Intrinsic::amdgcn_struct_buffer_load:
5176 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5177 case Intrinsic::amdgcn_struct_tbuffer_load:
5178 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5179 case Intrinsic::amdgcn_struct_atomic_buffer_load:
5180 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
5188 case Intrinsic::amdgcn_struct_buffer_load_lds:
5189 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5197 case Intrinsic::amdgcn_struct_buffer_store:
5198 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5199 case Intrinsic::amdgcn_struct_tbuffer_store:
5200 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5208 case Intrinsic::amdgcn_init_exec_from_input: {
5210 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5213 case Intrinsic::amdgcn_ds_gws_init:
5214 case Intrinsic::amdgcn_ds_gws_barrier:
5215 case Intrinsic::amdgcn_ds_gws_sema_br: {
5216 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5220 AMDGPU::SGPRRegBankID);
5221 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5224 case Intrinsic::amdgcn_ds_gws_sema_v:
5225 case Intrinsic::amdgcn_ds_gws_sema_p:
5226 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5229 AMDGPU::SGPRRegBankID);
5230 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5233 case Intrinsic::amdgcn_global_load_lds: {
5238 case Intrinsic::amdgcn_lds_direct_load: {
5239 const int M0Idx =
MI.getNumOperands() - 1;
5240 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5242 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5244 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5245 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5246 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5250 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5253 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5254 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5258 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
5271 case Intrinsic::amdgcn_s_sleep_var:
5274 case Intrinsic::amdgcn_s_barrier_join:
5275 case Intrinsic::amdgcn_s_wakeup_barrier:
5278 case Intrinsic::amdgcn_s_barrier_init:
5279 case Intrinsic::amdgcn_s_barrier_signal_var:
5283 case Intrinsic::amdgcn_s_barrier_signal_isfirst: {
5284 const unsigned ResultSize = 1;
5286 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5289 case Intrinsic::amdgcn_s_get_barrier_state:
5290 case Intrinsic::amdgcn_s_get_named_barrier_state: {
5295 case Intrinsic::amdgcn_pops_exiting_wave_id:
5297 case Intrinsic::amdgcn_s_prefetch_data: {
5307 case AMDGPU::G_SELECT: {
5308 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5310 AMDGPU::SGPRRegBankID);
5312 AMDGPU::SGPRRegBankID);
5313 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5314 Op3Bank == AMDGPU::SGPRRegBankID;
5316 unsigned CondBankDefault = SGPRSrcs ?
5317 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5320 if (CondBank == AMDGPU::SGPRRegBankID)
5321 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5322 else if (CondBank == AMDGPU::VGPRRegBankID)
5323 CondBank = AMDGPU::VCCRegBankID;
5325 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5326 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5328 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5332 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5333 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5334 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5335 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5337 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5338 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5339 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5340 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5346 case AMDGPU::G_SI_CALL: {
5347 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5353 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5354 if (
MI.getOperand(
I).isReg()) {
5358 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5363 case AMDGPU::G_LOAD:
5364 case AMDGPU::G_ZEXTLOAD:
5365 case AMDGPU::G_SEXTLOAD:
5368 case AMDGPU::G_ATOMICRMW_XCHG:
5369 case AMDGPU::G_ATOMICRMW_ADD:
5370 case AMDGPU::G_ATOMICRMW_SUB:
5371 case AMDGPU::G_ATOMICRMW_AND:
5372 case AMDGPU::G_ATOMICRMW_OR:
5373 case AMDGPU::G_ATOMICRMW_XOR:
5374 case AMDGPU::G_ATOMICRMW_MAX:
5375 case AMDGPU::G_ATOMICRMW_MIN:
5376 case AMDGPU::G_ATOMICRMW_UMAX:
5377 case AMDGPU::G_ATOMICRMW_UMIN:
5378 case AMDGPU::G_ATOMICRMW_FADD:
5379 case AMDGPU::G_ATOMICRMW_FMIN:
5380 case AMDGPU::G_ATOMICRMW_FMAX:
5381 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5382 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5383 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5389 case AMDGPU::G_ATOMIC_CMPXCHG: {
5396 case AMDGPU::G_BRCOND: {
5398 AMDGPU::SGPRRegBankID);
5399 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
5400 if (Bank != AMDGPU::SGPRRegBankID)
5401 Bank = AMDGPU::VCCRegBankID;
5403 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5406 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
5408 case AMDGPU::G_PREFETCH:
5415 MI.getNumOperands());
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)
static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)
static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)
Implement extending a 32-bit value to a 64-bit value.
static unsigned getExtendOp(unsigned Opc)
static bool isVectorRegisterBank(const RegisterBank &Bank)
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)
Split Ty into 2 pieces.
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)
Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.
static LLT widen96To128(LLT Ty)
static LLT getHalfSizedType(LLT Ty)
static unsigned getSBufferLoadCorrespondingBufferLoadOpcode(unsigned Opc)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static constexpr Register SPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isUniformMMO(const MachineMemOperand *MMO)
bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const
bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const
const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const
InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const
bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const
Legalize instruction MI where operands in OpIndices must be SGPRs.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const
Handle register layout difference for f16 images for some subtargets.
const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const
void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override
See RegisterBankInfo::applyMapping.
bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const
bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const
const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool isScalarLoadLegal(const MachineInstr &MI) const
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const
const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const
Split 64-bit value Reg into two 32-bit halves and populate them into Regs.
const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const
Return the mapping for a pointer argument.
unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isDivergentRegBank(const RegisterBank *RB) const override
Returns true if the register bank is considered divergent.
void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const
const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
const GCNSubtarget & Subtarget
const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const
bool isSALUMapping(const MachineInstr &MI) const
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const
bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
const SIRegisterInfo * TRI
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool hasScalarCompareEq64() const
bool hasScalarSubwordLoads() const
bool hasFullRate64Ops() const
bool hasScalarDwordx3Loads() const
bool hasScalarMulHiInsts() const
bool hasPseudoScalarTrans() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasSALUFloatInsts() const
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void createdInstr(MachineInstr &MI)=0
An instruction has been created and inserted into the function.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT getScalarType() const
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
TypeSize getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
const InstructionMapping & getInstrMapping() const
The final mapping of the instruction.
MachineInstr & getMI() const
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
const unsigned * Sizes
Hold the sizes of the register banks for all HwModes.
bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static IntegerType * getInt32Ty(LLVMContext &C)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
bool isFlatGlobalAddrSpace(unsigned AS)
bool isExtendedGlobalAddrSpace(unsigned AS)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
auto reverse(ContainerTy &&C)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
const RegisterBank * RegBank
Register bank where the partial value lives.
unsigned Length
Length of this mapping in bits.
Helper struct that represents how a value is mapped through different register banks.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
The llvm::once_flag structure.