84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping()
override {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(
S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(
S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0, E =
MI.getNumExplicitDefs();
I != E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
334 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I],
Sizes[
I]);
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
478 switch (
MI.getOpcode()) {
479 case TargetOpcode::G_CONSTANT:
480 case TargetOpcode::G_IMPLICIT_DEF: {
484 { { AMDGPU::VGPRRegBankID }, 1 },
485 { { AMDGPU::SGPRRegBankID }, 1 },
486 { { AMDGPU::VCCRegBankID }, 1 }
489 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
494 case TargetOpcode::G_FCONSTANT:
495 case TargetOpcode::G_FRAME_INDEX:
496 case TargetOpcode::G_GLOBAL_VALUE: {
498 { { AMDGPU::VGPRRegBankID }, 1 },
499 { { AMDGPU::SGPRRegBankID }, 1 }
502 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
504 case TargetOpcode::G_AND:
505 case TargetOpcode::G_OR:
506 case TargetOpcode::G_XOR: {
513 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
514 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
515 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
521 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
522 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
523 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
534 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
535 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
536 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
542 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
543 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
544 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
549 case TargetOpcode::G_LOAD:
550 case TargetOpcode::G_ZEXTLOAD:
551 case TargetOpcode::G_SEXTLOAD: {
553 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
562 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
563 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
571 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
572 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
585 case TargetOpcode::G_SELECT: {
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
590 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
591 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
597 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
598 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
599 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
605 case TargetOpcode::G_UADDE:
606 case TargetOpcode::G_USUBE:
607 case TargetOpcode::G_SADDE:
608 case TargetOpcode::G_SSUBE: {
612 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
615 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
616 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
622 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
623 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
624 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
625 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
630 case AMDGPU::G_BRCOND: {
631 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
636 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
642 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
647 case AMDGPU::G_INTRINSIC:
648 case AMDGPU::G_INTRINSIC_CONVERGENT:
650 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
651 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
666 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
667 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
669 MRI->setRegBank(LoLHS, *Bank);
670 MRI->setRegBank(HiLHS, *Bank);
675 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
686 MRI.setType(Reg, NewTy);
706 LLT Ty =
MRI.getType(Src);
709 if (Bank == &AMDGPU::SGPRRegBank)
715 if (Bank != &AMDGPU::VGPRRegBank) {
717 Src =
B.buildCopy(Ty, Src).getReg(0);
718 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
722 unsigned NumParts = Bits / 32;
729 auto Unmerge =
B.buildUnmerge(
S32, Src);
730 for (
unsigned i = 0; i < NumParts; ++i)
734 for (
unsigned i = 0; i < NumParts; ++i) {
736 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737 MRI.setType(DstPart, NumParts == 1 ? Ty :
S32);
742 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
744 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
786 const unsigned MovExecOpc =
788 const unsigned MovExecTermOpc =
792 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
794 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
796 AMDGPU::EXEC_LO : AMDGPU::EXEC;
799 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
803 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
804 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
807 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
808 .addDef(InitSaveExecReg);
810 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
811 Register NewExec =
MRI.createVirtualRegister(WaveRC);
837 B.setInsertPt(*LoopBB, LoopBB->
end());
839 B.buildInstr(TargetOpcode::PHI)
841 .addReg(InitSaveExecReg)
856 auto NewEnd = BodyBB->
end();
863 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
868 if (!SGPROperandRegs.
count(OldReg))
873 auto OldVal = WaterfalledRegMap.
find(OldReg);
874 if (OldVal != WaterfalledRegMap.
end()) {
875 Op.setReg(OldVal->second);
880 LLT OpTy =
MRI.getType(OpReg);
883 if (OpBank != &AMDGPU::VGPRRegBank) {
886 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
887 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
895 bool Is64 = OpSize % 64 == 0;
896 unsigned PartSize = Is64 ? 64 : 32;
898 unsigned NumParts = OpSize / PartSize;
904 CurrentLaneParts.
push_back(CurrentLaneReg);
906 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
907 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
908 for (
unsigned i = 0; i < NumParts; ++i) {
910 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
911 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
912 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
916 for (
unsigned i = 0; i < NumParts; ++i) {
918 OpParts[i]).getReg(0);
919 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
924 CondReg =
B.buildAnd(
S1, CondReg, CmpReg).getReg(0);
925 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
929 Op.setReg(CurrentLaneReg);
932 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
937 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
941 MRI.setRegClass(CondReg, WaveRC);
944 B.buildInstr(AndSaveExecOpc)
948 MRI.setSimpleHint(NewExec, CondReg);
950 B.setInsertPt(*BodyBB, BodyBB->
end());
953 B.buildInstr(XorTermOpc)
962 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
969 B.setMBB(*RestoreExecBB);
970 B.buildInstr(MovExecTermOpc)
972 .addReg(SaveExecReg);
976 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
988 for (
unsigned Op : OpIndices) {
992 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
993 SGPROperandRegs.
insert(Reg);
997 return !SGPROperandRegs.
empty();
1017 Register Reg =
MI.getOperand(OpIdx).getReg();
1020 if (Bank == &AMDGPU::SGPRRegBank)
1024 MI.getOperand(OpIdx).setReg(Reg);
1036 assert(FirstSize % EltSize == 0);
1038 unsigned FirstPartNumElts = FirstSize / EltSize;
1039 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1060 const LLT LoadTy =
MRI.getType(DstReg);
1063 const unsigned MaxNonSmrdLoadSize = 128;
1067 if (DstBank == &AMDGPU::SGPRRegBank) {
1078 if (LoadSize == 32 &&
1082 if (LoadSize == 32 &&
1091 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1093 if (LoadSize == 32) {
1097 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1099 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1100 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1101 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1103 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1104 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1107 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1121 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1123 B.buildTrunc(
MI.getOperand(0), WideLoad);
1125 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1130 MI.eraseFromParent();
1135 if (LoadSize <= MaxNonSmrdLoadSize)
1141 if (SrcRegs.
empty())
1147 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1148 MRI.setType(BasePtrReg, PtrTy);
1154 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1155 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1156 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1157 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1169 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1180 const auto &TFI = *ST.getFrameLowering();
1185 "Stack grows upwards for AMDGPU");
1188 Register AllocSize =
MI.getOperand(1).getReg();
1194 if (SizeBank != &AMDGPU::SGPRRegBank)
1197 LLT PtrTy =
MRI.getType(Dst);
1202 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1204 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1205 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1207 auto OldSP =
B.buildCopy(PtrTy,
SPReg);
1208 if (Alignment > TFI.getStackAlign()) {
1209 auto StackAlignMask = (Alignment.
value() << ST.getWavefrontSizeLog2()) - 1;
1210 auto Tmp1 =
B.buildPtrAdd(PtrTy, OldSP,
1212 B.buildMaskLowPtrBits(Dst, Tmp1,
1213 Log2(Alignment) + ST.getWavefrontSizeLog2());
1215 B.buildCopy(Dst, OldSP);
1217 auto PtrAdd =
B.buildPtrAdd(PtrTy, Dst, ScaledSize);
1218 B.buildCopy(
SPReg, PtrAdd);
1219 MI.eraseFromParent();
1226 int RsrcIdx)
const {
1227 const int NumDefs =
MI.getNumExplicitDefs();
1231 RsrcIdx += NumDefs + 1;
1238 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1239 if (!
MI.getOperand(
I).isReg())
1243 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1255 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1259 if (std::optional<int64_t> Imm =
1263 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1264 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1265 InstOffsetVal = ImmOffset;
1267 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1268 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1269 return SOffset + ImmOffset;
1284 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1285 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1286 InstOffsetVal = ImmOffset;
1292 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1293 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1295 InstOffsetVal = ImmOffset;
1309 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1315 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1325 VOffsetReg = CombinedOffset;
1327 VOffsetReg =
B.buildCopy(
S32, CombinedOffset).getReg(0);
1328 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1331 SOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1332 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1338 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
1339 return AMDGPU::G_AMDGPU_BUFFER_LOAD;
1340 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
1341 return AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
1342 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
1343 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
1344 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
1345 return AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
1346 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
1347 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
1361 LLT Ty =
MRI.getType(Dst);
1367 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1368 OffsetBank == &AMDGPU::SGPRRegBank)
1376 if (LoadSize == 256 || LoadSize == 512) {
1377 NumLoads = LoadSize / 128;
1378 Ty = Ty.
divide(NumLoads);
1383 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1389 int64_t ImmOffset = 0;
1392 SOffset, ImmOffset, Alignment);
1397 const Align MemAlign(4);
1411 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1418 for (
int i = 0; i < NumLoads; ++i) {
1419 if (NumLoads == 1) {
1422 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1423 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1431 .addDef(LoadParts[i])
1436 .addImm(ImmOffset + 16 * i)
1439 .addMemOperand(MMO);
1445 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1448 B.setInstr(*Span.
begin());
1449 MI.eraseFromParent();
1453 OpsToWaterfall.
insert(RSrc);
1458 if (NumLoads != 1) {
1460 B.buildConcatVectors(Dst, LoadParts);
1462 B.buildMergeLikeInstr(Dst, LoadParts);
1466 if (RSrcBank == &AMDGPU::SGPRRegBank)
1467 MI.eraseFromParent();
1482 LLT Ty =
MRI.getType(DstReg);
1486 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1487 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1488 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1489 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1493 if (DstBank == &AMDGPU::VGPRRegBank) {
1499 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1503 auto ShiftOffset =
Signed ?
B.buildAShr(
S64, SrcReg, OffsetReg)
1504 :
B.buildLShr(
S64, SrcReg, OffsetReg);
1505 auto UnmergeSOffset =
B.buildUnmerge({
S32,
S32}, ShiftOffset);
1512 auto Zero =
B.buildConstant(
S32, 0);
1513 auto WidthImm = ConstWidth->Value.getZExtValue();
1514 if (WidthImm <= 32) {
1518 Signed ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1519 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1521 Signed ?
B.buildAShr(
S32, Extract,
B.buildConstant(
S32, 31)) : Zero;
1522 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1526 auto UpperWidth =
B.buildConstant(
S32, WidthImm - 32);
1529 ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1530 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1531 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1533 MI.eraseFromParent();
1539 auto ExtShift =
B.buildSub(
S32,
B.buildConstant(
S32, 64), WidthReg);
1540 auto SignBit =
B.buildShl(
S64, ShiftOffset, ExtShift);
1542 B.buildAShr(
S64, SignBit, ExtShift);
1544 B.buildLShr(
S64, SignBit, ExtShift);
1545 MI.eraseFromParent();
1551 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1554 auto OffsetMask =
B.buildConstant(
S32, maskTrailingOnes<unsigned>(6));
1555 auto ClampOffset =
B.buildAnd(
S32, OffsetReg, OffsetMask);
1558 auto ShiftWidth =
B.buildShl(
S32, WidthReg,
B.buildConstant(
S32, 16));
1563 auto MergedInputs =
B.buildOr(
S32, ClampOffset, ShiftWidth);
1567 unsigned Opc = Ty ==
S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1568 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1570 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1574 MI.eraseFromParent();
1592 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1595 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1599 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1600 bool Accumulate =
true;
1609 Register DstLo =
B.buildMul(
S32, Src0, Src1).getReg(0);
1610 bool MulHiInVgpr =
false;
1612 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1615 DstHi = IsUnsigned ?
B.buildUMulH(
S32, Src0, Src1).getReg(0)
1616 :
B.buildSMulH(
S32, Src0, Src1).getReg(0);
1617 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1622 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1623 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1625 DstHi = IsUnsigned ?
B.buildUMulH(
S32, VSrc0, VSrc1).getReg(0)
1626 :
B.buildSMulH(
S32, VSrc0, VSrc1).getReg(0);
1627 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1643 LLT CarryType = DstOnValu ?
S1 :
S32;
1645 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1647 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1652 Zero =
B.buildConstant(
S32, 0).getReg(0);
1653 MRI.setRegBank(Zero,
1654 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1658 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1659 : AMDGPU::SGPRRegBank);
1661 if (DstOnValu && !MulHiInVgpr) {
1662 Carry =
B.buildTrunc(
S1, Carry).getReg(0);
1663 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1669 DstLo =
B.buildCopy(
S32, DstLo).getReg(0);
1670 DstHi =
B.buildCopy(
S32, DstHi).getReg(0);
1671 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1672 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1675 auto Unmerge =
B.buildUnmerge(
S32, Src2);
1676 Register Src2Lo = Unmerge.getReg(0);
1677 Register Src2Hi = Unmerge.getReg(1);
1678 MRI.setRegBank(Src2Lo, DstBank);
1679 MRI.setRegBank(Src2Hi, DstBank);
1683 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1685 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1686 MRI.setRegBank(Carry, CarryBank);
1689 auto AddLo =
B.buildUAddo(
S32, CarryType, DstLo, Src2Lo);
1690 DstLo = AddLo.getReg(0);
1691 Register CarryLo = AddLo.getReg(1);
1692 MRI.setRegBank(DstLo, DstBank);
1693 MRI.setRegBank(CarryLo, CarryBank);
1695 auto AddHi =
B.buildUAdde(
S32, CarryType, DstHi, Src2Hi, CarryLo);
1696 DstHi = AddHi.getReg(0);
1697 MRI.setRegBank(DstHi, DstBank);
1699 Register CarryHi = AddHi.getReg(1);
1700 MRI.setRegBank(CarryHi, CarryBank);
1705 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1706 MRI.setRegBank(Carry, CarryBank);
1710 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1711 MRI.setRegBank(Carry, CarryBank);
1715 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1718 B.buildCopy(Dst1, Carry);
1720 B.buildTrunc(Dst1, Carry);
1723 MI.eraseFromParent();
1730 case TargetOpcode::G_ASHR:
1731 case TargetOpcode::G_SMIN:
1732 case TargetOpcode::G_SMAX:
1733 return TargetOpcode::G_SEXT;
1734 case TargetOpcode::G_LSHR:
1735 case TargetOpcode::G_UMIN:
1736 case TargetOpcode::G_UMAX:
1737 return TargetOpcode::G_ZEXT;
1739 return TargetOpcode::G_ANYEXT;
1745static std::pair<Register, Register>
1748 auto Bitcast =
B.buildBitcast(
S32, Src);
1750 if (ExtOpcode == TargetOpcode::G_SEXT) {
1751 auto ExtLo =
B.buildSExtInReg(
S32, Bitcast, 16);
1752 auto ShiftHi =
B.buildAShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1753 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1756 auto ShiftHi =
B.buildLShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1757 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1758 auto ExtLo =
B.buildAnd(
S32, Bitcast,
B.buildConstant(
S32, 0xffff));
1759 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1762 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1763 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1771 if (!SrcReg.
empty()) {
1788 LLT StoreVT =
MRI.getType(Reg);
1792 auto Unmerge =
B.buildUnmerge(
S16, Reg);
1796 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
1806static std::pair<Register, unsigned>
1810 return std::pair(
Register(), Const);
1814 return std::pair(
Base, Const);
1817 return std::pair(Reg, 0);
1820std::pair<Register, unsigned>
1833 if (ImmOffset != 0) {
1842 unsigned Overflow = ImmOffset & ~MaxImm;
1843 ImmOffset -= Overflow;
1844 if ((int32_t)Overflow < 0) {
1845 Overflow += ImmOffset;
1850 if (Overflow != 0) {
1852 BaseReg =
B.buildConstant(
S32, Overflow).getReg(0);
1854 auto OverflowVal =
B.buildConstant(
S32, Overflow);
1855 BaseReg =
B.buildAdd(
S32, BaseReg, OverflowVal).getReg(0);
1861 BaseReg =
B.buildConstant(
S32, 0).getReg(0);
1863 return {BaseReg, C1};
1869 LLT SrcTy =
MRI.getType(SrcReg);
1872 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1879 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1880 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1882 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1884 .addUse(SrcReg, 0, AMDGPU::sub0);
1885 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1887 .addUse(SrcReg, 0, AMDGPU::sub1);
1888 B.buildInstr(AMDGPU::REG_SEQUENCE)
1891 .addImm(AMDGPU::sub0)
1893 .addImm(AMDGPU::sub1);
1904 unsigned ConstOffset) {
1910 auto MaterializedOffset =
B.buildConstant(
S32, ConstOffset);
1912 auto Add =
B.buildAdd(
S32, WaterfallIdx, MaterializedOffset);
1913 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1914 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1926 bool IsBooleanSrc =
false) {
1927 if (ExtOpc == AMDGPU::G_ZEXT) {
1928 B.buildConstant(Hi32Reg, 0);
1929 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1933 B.buildCopy(Hi32Reg, Lo32Reg);
1937 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1938 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1941 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1942 B.buildUndef(Hi32Reg);
1946bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1948 const OperandsMapper &OpdMapper)
const {
1955 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1957 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1959 LLT VecTy =
MRI.getType(VecReg);
1970 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1972 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1975 (DstBank == AMDGPU::SGPRRegBank &&
1976 SrcBank == AMDGPU::SGPRRegBank &&
1977 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1978 : AMDGPU::VCCRegBank;
1981 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1982 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
1983 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1988 unsigned NumLanes = DstRegs.size();
1992 EltTy =
MRI.getType(DstRegs[0]);
1994 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1996 for (
unsigned L = 0;
L < NumLanes; ++
L)
1997 Res[L] = UnmergeToEltTy.getReg(L);
1999 for (
unsigned I = 1;
I < NumElem; ++
I) {
2000 auto IC =
B.buildConstant(
S32,
I);
2001 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2003 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2005 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2006 auto S =
B.buildSelect(EltTy, Cmp,
2007 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
2009 for (
unsigned N : { 0, 2, 3 })
2010 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
2012 Res[
L] = S->getOperand(0).getReg();
2016 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2017 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
2018 B.buildCopy(DstReg, Res[L]);
2019 MRI.setRegBank(DstReg, DstBank);
2022 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2023 MI.eraseFromParent();
2034 if (CurrBank && *CurrBank != Bank) {
2035 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2036 MRI.setRegBank(Copy, Bank);
2040 MRI.setRegBank(Reg, Bank);
2044bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2046 const OperandsMapper &OpdMapper)
const {
2053 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2055 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2057 LLT VecTy =
MRI.getType(VecReg);
2068 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2070 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2072 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2075 (DstBank == AMDGPU::SGPRRegBank &&
2076 SrcBank == AMDGPU::SGPRRegBank &&
2077 InsBank == AMDGPU::SGPRRegBank &&
2078 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2079 : AMDGPU::VCCRegBank;
2082 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2083 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
2084 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2089 unsigned NumLanes = InsRegs.size();
2092 InsRegs.push_back(
MI.getOperand(2).getReg());
2094 EltTy =
MRI.getType(InsRegs[0]);
2097 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2100 for (
unsigned I = 0;
I < NumElem; ++
I) {
2101 auto IC =
B.buildConstant(
S32,
I);
2102 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2104 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2106 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2108 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2119 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2120 B.buildBuildVector(
MI.getOperand(0), Ops);
2122 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2123 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2124 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2127 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2128 MI.eraseFromParent();
2141 if (DefRegs.
empty()) {
2149 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2160 if (Src0Regs.
empty())
2165 if (Src1Regs.
empty())
2188 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2190 Register Hi =
B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2191 Register MulLoHi =
B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2193 Register MulHiLo =
B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2194 B.buildAdd(DefRegs[1],
Add, MulHiLo);
2195 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2197 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2198 MI.eraseFromParent();
2204 B.setInstrAndDebugLoc(
MI);
2205 unsigned Opc =
MI.getOpcode();
2208 case AMDGPU::G_CONSTANT:
2209 case AMDGPU::G_IMPLICIT_DEF: {
2211 LLT DstTy =
MRI.getType(DstReg);
2217 if (DstBank == &AMDGPU::VCCRegBank)
2220 if (DefRegs.
empty())
2223 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2226 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2228 MI.getOperand(0).setReg(NewDstReg);
2229 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2230 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2231 MI.getOperand(1).setCImm(
2235 MRI.setRegBank(NewDstReg, *DstBank);
2236 B.buildTrunc(DefRegs[0], NewDstReg);
2239 case AMDGPU::G_PHI: {
2241 LLT DstTy =
MRI.getType(DstReg);
2248 if (DstBank == &AMDGPU::VCCRegBank) {
2255 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2259 if (SrcBank != &AMDGPU::VCCRegBank) {
2264 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2265 MI.getOperand(
I).setReg(Copy.getReg(0));
2276 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2277 B.setInsertPt(
B.getMBB(),
MI);
2285 case AMDGPU::G_FCMP:
2289 case AMDGPU::G_ICMP:
2290 case AMDGPU::G_UADDO:
2291 case AMDGPU::G_USUBO:
2292 case AMDGPU::G_UADDE:
2293 case AMDGPU::G_SADDE:
2294 case AMDGPU::G_USUBE:
2295 case AMDGPU::G_SSUBE: {
2296 unsigned BoolDstOp =
2297 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2298 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2302 if (DstBank != &AMDGPU::SGPRRegBank)
2305 const bool HasCarryIn =
MI.getNumOperands() == 5;
2311 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2312 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2316 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2317 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2318 MI.getOperand(4).setReg(NewSrcReg);
2322 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2327 if (DefRegs.
empty())
2329 B.buildTrunc(DefRegs[0], NewDstReg);
2332 case AMDGPU::G_SELECT: {
2334 LLT DstTy =
MRI.getType(DstReg);
2337 if (CondRegs.
empty())
2344 if (CondBank == &AMDGPU::SGPRRegBank) {
2347 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2349 MI.getOperand(1).setReg(NewCondReg);
2350 B.buildZExt(NewCondReg, CondRegs[0]);
2363 if (DefRegs.
empty()) {
2368 if (Src1Regs.
empty())
2374 if (Src2Regs.
empty())
2381 auto Flags =
MI.getFlags();
2382 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0], Flags);
2383 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1], Flags);
2385 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2386 MI.eraseFromParent();
2389 case AMDGPU::G_BRCOND: {
2390 Register CondReg =
MI.getOperand(0).getReg();
2395 if (CondBank == &AMDGPU::SGPRRegBank) {
2398 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2400 MI.getOperand(0).setReg(NewCondReg);
2401 B.buildZExt(NewCondReg, CondReg);
2409 case AMDGPU::G_XOR: {
2413 LLT DstTy =
MRI.getType(DstReg);
2418 if (DstBank == &AMDGPU::VCCRegBank)
2422 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2440 if (DefRegs.
empty()) {
2447 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2453 if (Src0Regs.
empty())
2458 if (Src1Regs.
empty())
2465 auto Flags =
MI.getFlags();
2466 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]}, Flags);
2467 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]}, Flags);
2469 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2470 MI.eraseFromParent();
2473 case AMDGPU::G_ABS: {
2479 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2481 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2494 case AMDGPU::G_LSHR:
2495 case AMDGPU::G_ASHR:
2496 case AMDGPU::G_SMIN:
2497 case AMDGPU::G_SMAX:
2498 case AMDGPU::G_UMIN:
2499 case AMDGPU::G_UMAX: {
2501 LLT DstTy =
MRI.getType(DstReg);
2518 if (DstBank == &AMDGPU::VGPRRegBank)
2524 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2526 if (DstTy.
isVector() && Opc == AMDGPU::G_ABS) {
2529 std::tie(WideSrcLo, WideSrcHi) =
2531 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2532 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2533 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2534 MI.eraseFromParent();
2543 std::tie(WideSrc0Lo, WideSrc0Hi)
2545 std::tie(WideSrc1Lo, WideSrc1Hi)
2547 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2548 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2549 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2550 MI.eraseFromParent();
2558 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2559 Opc == AMDGPU::G_ASHR) {
2560 B.setInsertPt(*
MBB,
MI.getIterator());
2568 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2569 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2583 Register SrcReg0 =
MI.getOperand(1).getReg();
2584 Register SrcReg1 =
MI.getOperand(2).getReg();
2587 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2588 "that handles only 64-bit operands.");
2594 if (DstBank == &AMDGPU::SGPRRegBank) {
2595 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2596 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2597 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2598 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2604 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2605 "The destination operand should be in vector registers.");
2610 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2611 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2613 B.buildTrunc(Op0L, SrcReg0);
2616 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2617 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2619 B.buildTrunc(Op1L, SrcReg1);
2621 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2622 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2623 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2627 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2628 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2629 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2630 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2631 MI.eraseFromParent();
2634 case AMDGPU::G_SEXT_INREG: {
2636 if (SrcRegs.
empty())
2640 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2647 int Amt =
MI.getOperand(2).getImm();
2653 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2655 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2657 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2660 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2664 B.buildCopy(DstRegs[0], SrcRegs[0]);
2665 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2669 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2670 MI.eraseFromParent();
2673 case AMDGPU::G_CTPOP:
2674 case AMDGPU::G_BITREVERSE: {
2677 if (DstBank == &AMDGPU::SGPRRegBank)
2682 LLT Ty =
MRI.getType(SrcReg);
2686 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2695 case AMDGPU::G_AMDGPU_FFBH_U32:
2696 case AMDGPU::G_AMDGPU_FFBL_B32:
2697 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2698 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2701 if (DstBank == &AMDGPU::SGPRRegBank)
2706 LLT Ty =
MRI.getType(SrcReg);
2716 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2718 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2719 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2720 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2721 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2723 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2724 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx]});
2725 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx ^ 1]});
2727 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2729 : AMDGPU::G_UADDSAT;
2730 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2732 B.buildUMin(DstReg,
X,
Y);
2733 MI.eraseFromParent();
2736 case AMDGPU::G_SEXT:
2737 case AMDGPU::G_ZEXT:
2738 case AMDGPU::G_ANYEXT: {
2740 LLT SrcTy =
MRI.getType(SrcReg);
2741 const bool Signed = Opc == AMDGPU::G_SEXT;
2749 LLT DstTy =
MRI.getType(DstReg);
2751 SrcBank != &AMDGPU::SGPRRegBank &&
2752 SrcBank != &AMDGPU::VCCRegBank &&
2762 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2763 }
else if (Opc == AMDGPU::G_ZEXT) {
2764 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2766 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2770 MRI.setRegBank(DstReg, *SrcBank);
2771 MI.eraseFromParent();
2781 if (SrcBank == &AMDGPU::VCCRegBank) {
2788 const bool UseSel64 = DstSize > 32 &&
2789 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2793 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2794 auto False =
B.buildConstant(SelType, 0);
2796 MRI.setRegBank(True.getReg(0), *DstBank);
2797 MRI.setRegBank(False.getReg(0), *DstBank);
2798 MRI.setRegBank(DstReg, *DstBank);
2801 B.buildSelect(DefRegs[0], SrcReg, True, False);
2803 }
else if (DstSize < 32) {
2804 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2805 MRI.setRegBank(Sel.getReg(0), *DstBank);
2806 B.buildTrunc(DstReg, Sel);
2808 B.buildSelect(DstReg, SrcReg, True, False);
2811 MI.eraseFromParent();
2817 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2826 LLT DstTy =
MRI.getType(DstReg);
2827 LLT SrcTy =
MRI.getType(SrcReg);
2829 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2841 unsigned ConstOffset;
2842 std::tie(BaseIdxReg, ConstOffset) =
2849 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2854 if (ShouldMoveIndexIntoLoop)
2855 MI.getOperand(2).setReg(BaseIdxReg);
2861 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2862 SrcBank == &AMDGPU::SGPRRegBank;
2863 if (DstRegs.
empty()) {
2868 if (NeedCopyToVGPR) {
2870 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2871 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2872 MI.getOperand(0).setReg(TmpReg);
2873 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2880 if (ShouldMoveIndexIntoLoop)
2890 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2891 auto One =
B.buildConstant(
S32, 1);
2902 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2903 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2905 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2906 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2908 MRI.setRegBank(DstReg, *DstBank);
2909 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2910 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2911 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2912 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2916 MI.eraseFromParent();
2922 B.setInstr(*Span.
begin());
2923 MI.eraseFromParent();
2927 if (NeedCopyToVGPR) {
2931 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2932 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2934 Extract0->getOperand(0).setReg(TmpReg0);
2935 Extract1->getOperand(0).setReg(TmpReg1);
2943 if (ShouldMoveIndexIntoLoop)
2948 case AMDGPU::G_INSERT_VECTOR_ELT: {
2952 LLT VecTy =
MRI.getType(DstReg);
2958 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2960 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2968 LLT InsTy =
MRI.getType(InsReg);
2972 unsigned ConstOffset;
2973 std::tie(BaseIdxReg, ConstOffset) =
2980 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2985 if (ShouldMoveIndexIntoLoop)
2986 MI.getOperand(3).setReg(BaseIdxReg);
2989 if (InsRegs.
empty()) {
2993 if (ShouldMoveIndexIntoLoop) {
3005 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
3006 auto One =
B.buildConstant(
S32, 1);
3015 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
3016 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
3018 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
3019 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3028 MRI.setRegBank(InsReg, *InsSrcBank);
3029 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3030 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3031 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3032 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3033 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3034 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3039 B.setInsertPt(
B.getMBB(),
MI);
3040 B.buildBitcast(DstReg, InsHi);
3041 MI.eraseFromParent();
3045 B.setInstr(*Span.
begin());
3046 MI.eraseFromParent();
3057 B.buildBitcast(DstReg, InsHi);
3060 if (ShouldMoveIndexIntoLoop)
3065 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3066 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3067 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3068 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3069 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3070 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3071 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3072 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3073 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3074 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3075 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3076 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3077 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3078 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3079 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3080 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3081 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3082 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3083 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3084 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3085 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3086 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3091 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3092 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3093 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3094 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3095 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3096 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3097 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3098 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3099 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3100 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3101 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3102 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
3103 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3104 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3105 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3110 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3115 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3116 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3117 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3118 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3119 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3123 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
3127 case AMDGPU::G_INTRINSIC:
3128 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3130 case Intrinsic::amdgcn_readlane: {
3141 case Intrinsic::amdgcn_writelane: {
3151 case Intrinsic::amdgcn_interp_p1:
3152 case Intrinsic::amdgcn_interp_p2:
3153 case Intrinsic::amdgcn_interp_mov:
3154 case Intrinsic::amdgcn_interp_p1_f16:
3155 case Intrinsic::amdgcn_interp_p2_f16:
3156 case Intrinsic::amdgcn_lds_param_load: {
3164 case Intrinsic::amdgcn_interp_inreg_p10:
3165 case Intrinsic::amdgcn_interp_inreg_p2:
3166 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3167 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3168 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3169 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3170 case Intrinsic::amdgcn_permlane16_swap:
3171 case Intrinsic::amdgcn_permlane32_swap:
3174 case Intrinsic::amdgcn_permlane16:
3175 case Intrinsic::amdgcn_permlanex16: {
3183 case Intrinsic::amdgcn_sbfe:
3186 case Intrinsic::amdgcn_ubfe:
3189 case Intrinsic::amdgcn_inverse_ballot:
3190 case Intrinsic::amdgcn_s_bitreplicate:
3191 case Intrinsic::amdgcn_s_quadmask:
3192 case Intrinsic::amdgcn_s_wqm:
3196 case Intrinsic::amdgcn_ballot:
3202 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3203 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3204 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3205 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3206 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3216 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3217 unsigned N =
MI.getNumExplicitOperands() - 2;
3222 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3223 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3224 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3226 case Intrinsic::amdgcn_ds_ordered_add:
3227 case Intrinsic::amdgcn_ds_ordered_swap: {
3234 case Intrinsic::amdgcn_ds_gws_init:
3235 case Intrinsic::amdgcn_ds_gws_barrier:
3236 case Intrinsic::amdgcn_ds_gws_sema_br: {
3242 case Intrinsic::amdgcn_ds_gws_sema_v:
3243 case Intrinsic::amdgcn_ds_gws_sema_p:
3244 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3249 case Intrinsic::amdgcn_ds_append:
3250 case Intrinsic::amdgcn_ds_consume: {
3254 case Intrinsic::amdgcn_s_sendmsg:
3255 case Intrinsic::amdgcn_s_sendmsghalt: {
3260 case Intrinsic::amdgcn_s_setreg: {
3264 case Intrinsic::amdgcn_s_ttracedata:
3267 case Intrinsic::amdgcn_raw_buffer_load_lds:
3268 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3275 case Intrinsic::amdgcn_struct_buffer_load_lds:
3276 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3283 case Intrinsic::amdgcn_global_load_lds: {
3288 case Intrinsic::amdgcn_lds_direct_load: {
3294 case Intrinsic::amdgcn_exp_row:
3298 case Intrinsic::amdgcn_s_sleep_var:
3302 case Intrinsic::amdgcn_s_barrier_join:
3303 case Intrinsic::amdgcn_s_wakeup_barrier:
3306 case Intrinsic::amdgcn_s_barrier_init:
3307 case Intrinsic::amdgcn_s_barrier_signal_var:
3311 case Intrinsic::amdgcn_s_get_barrier_state:
3312 case Intrinsic::amdgcn_s_get_named_barrier_state: {
3316 case Intrinsic::amdgcn_s_prefetch_data: {
3318 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3323 MI.eraseFromParent();
3332 if (RSrcIntrin->IsImage) {
3343 case AMDGPU::G_SI_CALL: {
3354 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3355 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3361 unsigned NonCopyInstrsLen = 0;
3367 while (Start->getOpcode() != FrameSetupOpcode) {
3369 bool IsCopy =
false;
3370 if (Start->getOpcode() == AMDGPU::COPY) {
3371 auto &Dst = Start->getOperand(0);
3374 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3379 auto &Src = Start->getOperand(1);
3382 IsCopy =
Info->getScratchRSrcReg() == Reg;
3390 NonCopyInstrsLen = NonCopyInstrs.
size();
3395 NonCopyInstrs.
resize(NonCopyInstrsLen);
3397 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3403 NonCopyInstrs.
clear();
3404 NonCopyInstrsLen = 0;
3407 while (
End->getOpcode() != FrameDestroyOpcode) {
3409 bool IsCopy =
false;
3410 if (
End->getOpcode() == AMDGPU::COPY) {
3411 auto &Src =
End->getOperand(1);
3414 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3420 NonCopyInstrsLen = NonCopyInstrs.
size();
3425 NonCopyInstrs.
resize(NonCopyInstrsLen);
3429 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3434 B.setInsertPt(
B.getMBB(), Start);
3438 case AMDGPU::G_LOAD:
3439 case AMDGPU::G_ZEXTLOAD:
3440 case AMDGPU::G_SEXTLOAD: {
3445 case AMDGPU::G_DYN_STACKALLOC:
3448 case AMDGPU::G_STACKRESTORE: {
3453 case AMDGPU::G_SBFX:
3456 case AMDGPU::G_UBFX:
3459 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3460 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3463 case AMDGPU::G_PREFETCH: {
3465 MI.eraseFromParent();
3470 if (PtrBank == AMDGPU::VGPRRegBankID) {
3471 MI.eraseFromParent();
3474 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3477 MI.eraseFromParent();
3495 if (RB0 == AMDGPU::InvalidRegBankID)
3497 if (RB1 == AMDGPU::InvalidRegBankID)
3500 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3501 return AMDGPU::SGPRRegBankID;
3503 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3504 return AMDGPU::AGPRRegBankID;
3506 return AMDGPU::VGPRRegBankID;
3510 if (RB0 == AMDGPU::InvalidRegBankID)
3512 if (RB1 == AMDGPU::InvalidRegBankID)
3518 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3519 return AMDGPU::VCCRegBankID;
3527 unsigned RegBank = AMDGPU::InvalidRegBankID;
3535 if (RegBank == AMDGPU::VGPRRegBankID)
3551 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3564 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3570 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3573 MI.getNumOperands());
3586 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3592 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3593 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3597 MI.getNumOperands());
3606 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
3612 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3616 MI.getNumOperands());
3622 int RsrcIdx)
const {
3625 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3627 const int NumOps =
MI.getNumOperands();
3632 for (
int I = 0;
I != NumOps; ++
I) {
3633 if (!
MI.getOperand(
I).isReg())
3647 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3652 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3655 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3666 LLT PtrTy =
MRI.getType(PtrReg);
3670 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3675 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3686 LLT PtrTy =
MRI.getType(PtrReg);
3698 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3699 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3701 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3706 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3708 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3711 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3712 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3715 OpdsMapping[0] = ValMapping;
3716 OpdsMapping[1] = PtrMapping;
3741 return AMDGPU::getValueMapping(Bank,
Size);
3749 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3757 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3774 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3782 assert(SrcBank &&
"src bank should have been assigned already");
3789 DstBank = &AMDGPU::VCCRegBank;
3791 DstBank = &AMDGPU::VCCRegBank;
3797 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3802 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3804 OpdsMapping[0] = &ValMap;
3805 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3806 OpdsMapping[1] = &ValMap;
3813 if (
MI.isRegSequence()) {
3816 unsigned BankID = AMDGPU::SGPRRegBankID;
3818 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3822 if (OpBank != AMDGPU::SGPRRegBankID) {
3823 BankID = AMDGPU::VGPRRegBankID;
3839 if (
auto *
PHI = dyn_cast<GPhi>(&
MI)) {
3840 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3845 ResultBank = DstBank->
getID();
3847 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3852 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3853 ResultBank = AMDGPU::VGPRRegBankID;
3858 unsigned OpBank = Bank->
getID();
3862 assert(ResultBank != AMDGPU::InvalidRegBankID);
3864 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3879 switch (
MI.getOpcode()) {
3886 case AMDGPU::G_MUL: {
3887 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3892 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3893 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3894 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3896 TargetBankID = DstBank->
getID();
3897 if (DstBank == &AMDGPU::VCCRegBank) {
3898 TargetBankID = AMDGPU::VCCRegBankID;
3899 BankLHS = AMDGPU::VCCRegBankID;
3900 BankRHS = AMDGPU::VCCRegBankID;
3903 AMDGPU::SGPRRegBankID);
3905 AMDGPU::SGPRRegBankID);
3909 AMDGPU::VCCRegBankID);
3911 AMDGPU::VCCRegBankID);
3914 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3915 TargetBankID = AMDGPU::VGPRRegBankID;
3916 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3917 TargetBankID = AMDGPU::VCCRegBankID;
3918 BankLHS = AMDGPU::VCCRegBankID;
3919 BankRHS = AMDGPU::VCCRegBankID;
3920 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3921 TargetBankID = AMDGPU::SGPRRegBankID;
3925 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3926 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3927 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3934 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3935 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3937 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3939 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3942 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3950 case AMDGPU::G_PTR_ADD:
3951 case AMDGPU::G_PTRMASK:
3955 case AMDGPU::G_LSHR:
3956 case AMDGPU::G_ASHR:
3957 case AMDGPU::G_UADDO:
3958 case AMDGPU::G_USUBO:
3959 case AMDGPU::G_UADDE:
3960 case AMDGPU::G_SADDE:
3961 case AMDGPU::G_USUBE:
3962 case AMDGPU::G_SSUBE:
3963 case AMDGPU::G_SMIN:
3964 case AMDGPU::G_SMAX:
3965 case AMDGPU::G_UMIN:
3966 case AMDGPU::G_UMAX:
3968 case AMDGPU::G_SHUFFLE_VECTOR:
3969 case AMDGPU::G_SBFX:
3970 case AMDGPU::G_UBFX:
3971 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
3972 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
3976 case AMDGPU::G_FADD:
3977 case AMDGPU::G_FSUB:
3978 case AMDGPU::G_FMUL:
3980 case AMDGPU::G_FFLOOR:
3981 case AMDGPU::G_FCEIL:
3982 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
3983 case AMDGPU::G_FMINNUM:
3984 case AMDGPU::G_FMAXNUM:
3985 case AMDGPU::G_FMINIMUM:
3986 case AMDGPU::G_FMAXIMUM:
3987 case AMDGPU::G_INTRINSIC_TRUNC:
3988 case AMDGPU::G_STRICT_FADD:
3989 case AMDGPU::G_STRICT_FSUB:
3990 case AMDGPU::G_STRICT_FMUL:
3991 case AMDGPU::G_STRICT_FMA: {
3992 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
3999 case AMDGPU::G_FPTOSI:
4000 case AMDGPU::G_FPTOUI:
4001 case AMDGPU::G_SITOFP:
4002 case AMDGPU::G_UITOFP: {
4003 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4004 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4010 case AMDGPU::G_FPTRUNC:
4011 case AMDGPU::G_FPEXT: {
4012 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4013 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4019 case AMDGPU::G_FSQRT:
4020 case AMDGPU::G_FEXP2:
4021 case AMDGPU::G_FLOG2: {
4022 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4028 case AMDGPU::G_SADDSAT:
4029 case AMDGPU::G_SSUBSAT:
4030 case AMDGPU::G_UADDSAT:
4031 case AMDGPU::G_USUBSAT:
4032 case AMDGPU::G_FMAD:
4033 case AMDGPU::G_FLDEXP:
4034 case AMDGPU::G_FMINNUM_IEEE:
4035 case AMDGPU::G_FMAXNUM_IEEE:
4036 case AMDGPU::G_FCANONICALIZE:
4037 case AMDGPU::G_STRICT_FLDEXP:
4038 case AMDGPU::G_BSWAP:
4039 case AMDGPU::G_FSHR:
4040 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
4041 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
4042 case AMDGPU::G_AMDGPU_RCP_IFLAG:
4043 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4044 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4045 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4046 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4047 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4048 case AMDGPU::G_AMDGPU_SMED3:
4049 case AMDGPU::G_AMDGPU_FMED3:
4051 case AMDGPU::G_UMULH:
4052 case AMDGPU::G_SMULH: {
4057 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4058 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4067 bool AllSalu =
true;
4068 bool MulSalu =
true;
4069 for (
unsigned i = 0; i < 5; ++i) {
4072 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4074 if (i == 2 || i == 3) {
4092 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4093 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4094 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4095 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4096 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4099 case AMDGPU::G_IMPLICIT_DEF: {
4100 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4101 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4104 case AMDGPU::G_FCONSTANT:
4105 case AMDGPU::G_CONSTANT:
4106 case AMDGPU::G_GLOBAL_VALUE:
4107 case AMDGPU::G_FRAME_INDEX:
4108 case AMDGPU::G_BLOCK_ADDR:
4109 case AMDGPU::G_READSTEADYCOUNTER:
4110 case AMDGPU::G_READCYCLECOUNTER: {
4111 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4112 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4115 case AMDGPU::G_DYN_STACKALLOC: {
4117 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4119 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4122 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4127 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4128 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4131 case AMDGPU::G_INSERT: {
4136 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4137 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4138 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4139 OpdsMapping[3] =
nullptr;
4142 case AMDGPU::G_EXTRACT: {
4146 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4147 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4148 OpdsMapping[2] =
nullptr;
4151 case AMDGPU::G_BUILD_VECTOR:
4152 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4153 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4156 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4159 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4161 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4162 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4163 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4169 case AMDGPU::G_MERGE_VALUES:
4170 case AMDGPU::G_CONCAT_VECTORS: {
4172 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4173 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4175 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4177 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4178 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4181 case AMDGPU::G_BITREVERSE:
4182 case AMDGPU::G_BITCAST:
4183 case AMDGPU::G_INTTOPTR:
4184 case AMDGPU::G_PTRTOINT:
4185 case AMDGPU::G_FABS:
4186 case AMDGPU::G_FNEG: {
4187 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4189 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4192 case AMDGPU::G_AMDGPU_FFBH_U32:
4193 case AMDGPU::G_AMDGPU_FFBL_B32:
4194 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4195 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4196 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4198 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4199 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4202 case AMDGPU::G_CTPOP: {
4203 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4205 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4210 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4213 case AMDGPU::G_TRUNC: {
4219 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4220 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4223 case AMDGPU::G_ZEXT:
4224 case AMDGPU::G_SEXT:
4225 case AMDGPU::G_ANYEXT:
4226 case AMDGPU::G_SEXT_INREG: {
4235 switch (SrcBank->
getID()) {
4236 case AMDGPU::SGPRRegBankID:
4237 DstBank = AMDGPU::SGPRRegBankID;
4240 DstBank = AMDGPU::VGPRRegBankID;
4246 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4247 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4251 case AMDGPU::G_IS_FPCLASS: {
4253 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4254 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4255 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4256 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4259 case AMDGPU::G_STORE: {
4261 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4266 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4267 OpdsMapping[0] = ValMapping;
4271 case AMDGPU::G_ICMP:
4272 case AMDGPU::G_FCMP: {
4273 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4278 AMDGPU::SGPRRegBankID);
4282 auto canUseSCCICMP = [&]() {
4285 return Size == 32 ||
4290 auto canUseSCCFCMP = [&]() {
4294 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4295 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4296 Op2Bank == AMDGPU::SGPRRegBankID &&
4297 Op3Bank == AMDGPU::SGPRRegBankID &&
4298 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4300 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4301 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4305 const unsigned ResultSize = 1;
4307 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4308 OpdsMapping[1] =
nullptr;
4309 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4310 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4313 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4316 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4317 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4318 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4320 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4322 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4323 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4326 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4329 case AMDGPU::G_INSERT_VECTOR_ELT: {
4331 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4333 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4334 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4335 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4339 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4340 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4344 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4345 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4348 assert(InsertSize == 32 || InsertSize == 64);
4349 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4353 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4356 case AMDGPU::G_UNMERGE_VALUES: {
4361 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4363 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4367 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4368 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4369 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4370 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4371 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4372 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4373 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4374 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4375 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4376 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4377 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4378 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4379 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4380 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4381 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4382 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4383 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4384 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4385 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4386 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4387 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4388 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4407 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4408 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4409 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4410 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4411 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4412 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4413 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4414 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4415 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4416 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4417 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4418 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4419 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4420 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4421 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4444 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4470 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4471 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4472 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4473 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4474 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4482 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4483 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4484 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4486 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4487 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4490 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
4494 case AMDGPU::G_INTRINSIC:
4495 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4499 case Intrinsic::amdgcn_div_fmas:
4500 case Intrinsic::amdgcn_div_fixup:
4501 case Intrinsic::amdgcn_trig_preop:
4502 case Intrinsic::amdgcn_sin:
4503 case Intrinsic::amdgcn_cos:
4504 case Intrinsic::amdgcn_log_clamp:
4505 case Intrinsic::amdgcn_rcp_legacy:
4506 case Intrinsic::amdgcn_rsq_legacy:
4507 case Intrinsic::amdgcn_rsq_clamp:
4508 case Intrinsic::amdgcn_fmul_legacy:
4509 case Intrinsic::amdgcn_fma_legacy:
4510 case Intrinsic::amdgcn_frexp_mant:
4511 case Intrinsic::amdgcn_frexp_exp:
4512 case Intrinsic::amdgcn_fract:
4513 case Intrinsic::amdgcn_cvt_pknorm_i16:
4514 case Intrinsic::amdgcn_cvt_pknorm_u16:
4515 case Intrinsic::amdgcn_cvt_pk_i16:
4516 case Intrinsic::amdgcn_cvt_pk_u16:
4517 case Intrinsic::amdgcn_fmed3:
4518 case Intrinsic::amdgcn_cubeid:
4519 case Intrinsic::amdgcn_cubema:
4520 case Intrinsic::amdgcn_cubesc:
4521 case Intrinsic::amdgcn_cubetc:
4522 case Intrinsic::amdgcn_sffbh:
4523 case Intrinsic::amdgcn_fmad_ftz:
4524 case Intrinsic::amdgcn_mbcnt_lo:
4525 case Intrinsic::amdgcn_mbcnt_hi:
4526 case Intrinsic::amdgcn_mul_u24:
4527 case Intrinsic::amdgcn_mul_i24:
4528 case Intrinsic::amdgcn_mulhi_u24:
4529 case Intrinsic::amdgcn_mulhi_i24:
4530 case Intrinsic::amdgcn_lerp:
4531 case Intrinsic::amdgcn_sad_u8:
4532 case Intrinsic::amdgcn_msad_u8:
4533 case Intrinsic::amdgcn_sad_hi_u8:
4534 case Intrinsic::amdgcn_sad_u16:
4535 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4536 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4537 case Intrinsic::amdgcn_mqsad_u32_u8:
4538 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4539 case Intrinsic::amdgcn_alignbyte:
4540 case Intrinsic::amdgcn_perm:
4541 case Intrinsic::amdgcn_prng_b32:
4542 case Intrinsic::amdgcn_fdot2:
4543 case Intrinsic::amdgcn_sdot2:
4544 case Intrinsic::amdgcn_udot2:
4545 case Intrinsic::amdgcn_sdot4:
4546 case Intrinsic::amdgcn_udot4:
4547 case Intrinsic::amdgcn_sdot8:
4548 case Intrinsic::amdgcn_udot8:
4549 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4550 case Intrinsic::amdgcn_fdot2_f16_f16:
4551 case Intrinsic::amdgcn_fdot2_f32_bf16:
4552 case Intrinsic::amdgcn_fdot2c_f32_bf16:
4553 case Intrinsic::amdgcn_sudot4:
4554 case Intrinsic::amdgcn_sudot8:
4555 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4556 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4557 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4558 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4559 case Intrinsic::amdgcn_cvt_f32_fp8:
4560 case Intrinsic::amdgcn_cvt_f32_bf8:
4561 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4562 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4563 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4564 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4565 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4566 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4567 case Intrinsic::amdgcn_cvt_sr_bf16_f32:
4568 case Intrinsic::amdgcn_cvt_sr_f16_f32:
4569 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
4570 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
4571 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:
4572 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16:
4573 case Intrinsic::amdgcn_cvt_scalef32_f16_fp8:
4574 case Intrinsic::amdgcn_cvt_scalef32_f16_bf8:
4575 case Intrinsic::amdgcn_cvt_scalef32_f32_fp8:
4576 case Intrinsic::amdgcn_cvt_scalef32_f32_bf8:
4577 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f32:
4578 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f32:
4579 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp8:
4580 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_bf8:
4581 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f16:
4582 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_bf16:
4583 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f16:
4584 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_bf16:
4585 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp4:
4586 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f32:
4587 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp4:
4588 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp4:
4589 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_fp6:
4590 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_bf6:
4591 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_bf6:
4592 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_bf6:
4593 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_fp6:
4594 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_fp6:
4595 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_bf8:
4596 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_bf8:
4597 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp8:
4598 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp8:
4599 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f16:
4600 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_bf16:
4601 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
4602 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
4603 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
4604 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
4605 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
4606 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
4607 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
4608 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
4609 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
4610 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_bf16:
4611 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f16:
4612 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f32:
4613 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_bf16:
4614 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f16:
4615 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f32:
4616 case Intrinsic::amdgcn_ashr_pk_i8_i32:
4617 case Intrinsic::amdgcn_ashr_pk_u8_i32:
4618 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:
4619 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32:
4620 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4621 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4622 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4623 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4624 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4625 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4626 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4627 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4628 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4629 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4630 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4631 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4632 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4633 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4634 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4635 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4636 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4637 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4638 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4639 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4640 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4641 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4642 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4643 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4645 case Intrinsic::amdgcn_log:
4646 case Intrinsic::amdgcn_exp2:
4647 case Intrinsic::amdgcn_rcp:
4648 case Intrinsic::amdgcn_rsq:
4649 case Intrinsic::amdgcn_sqrt: {
4650 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4656 case Intrinsic::amdgcn_sbfe:
4657 case Intrinsic::amdgcn_ubfe:
4661 case Intrinsic::amdgcn_ds_swizzle:
4662 case Intrinsic::amdgcn_ds_permute:
4663 case Intrinsic::amdgcn_ds_bpermute:
4664 case Intrinsic::amdgcn_update_dpp:
4665 case Intrinsic::amdgcn_mov_dpp8:
4666 case Intrinsic::amdgcn_mov_dpp:
4667 case Intrinsic::amdgcn_strict_wwm:
4668 case Intrinsic::amdgcn_wwm:
4669 case Intrinsic::amdgcn_strict_wqm:
4670 case Intrinsic::amdgcn_wqm:
4671 case Intrinsic::amdgcn_softwqm:
4672 case Intrinsic::amdgcn_set_inactive:
4673 case Intrinsic::amdgcn_set_inactive_chain_arg:
4674 case Intrinsic::amdgcn_permlane64:
4676 case Intrinsic::amdgcn_cvt_pkrtz:
4680 case Intrinsic::amdgcn_kernarg_segment_ptr:
4681 case Intrinsic::amdgcn_s_getpc:
4682 case Intrinsic::amdgcn_groupstaticsize:
4683 case Intrinsic::amdgcn_reloc_constant:
4684 case Intrinsic::returnaddress: {
4685 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4686 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4689 case Intrinsic::amdgcn_wqm_vote: {
4690 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4691 OpdsMapping[0] = OpdsMapping[2]
4692 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4695 case Intrinsic::amdgcn_ps_live: {
4696 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4699 case Intrinsic::amdgcn_div_scale: {
4700 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4701 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4702 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4703 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4705 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4706 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4707 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4710 case Intrinsic::amdgcn_class: {
4711 Register Src0Reg =
MI.getOperand(2).getReg();
4712 Register Src1Reg =
MI.getOperand(3).getReg();
4713 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4714 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4715 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4716 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4717 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4718 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4721 case Intrinsic::amdgcn_icmp:
4722 case Intrinsic::amdgcn_fcmp: {
4723 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4725 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4726 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4727 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4728 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4731 case Intrinsic::amdgcn_readlane: {
4734 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4736 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4739 case Intrinsic::amdgcn_readfirstlane: {
4740 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4741 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4742 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4743 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4746 case Intrinsic::amdgcn_writelane: {
4747 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4749 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4752 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4754 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4758 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4759 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4760 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4763 case Intrinsic::amdgcn_if_break: {
4765 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4766 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4767 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4770 case Intrinsic::amdgcn_permlane16:
4771 case Intrinsic::amdgcn_permlanex16: {
4773 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4774 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4775 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4780 case Intrinsic::amdgcn_permlane16_var:
4781 case Intrinsic::amdgcn_permlanex16_var: {
4783 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4784 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4785 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4786 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4789 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4790 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4791 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4792 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4793 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4794 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4795 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4796 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4797 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4798 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4799 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4800 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4801 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4802 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4803 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4804 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4805 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4806 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4807 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4808 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4809 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4810 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4811 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4812 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4813 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4814 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4815 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4816 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4817 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4818 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4819 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4820 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4821 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4822 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4823 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4824 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4825 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4826 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4827 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8:
4828 case Intrinsic::amdgcn_mfma_f32_16x16x32_f16:
4829 case Intrinsic::amdgcn_mfma_f32_32x32x16_f16:
4830 case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
4831 case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
4832 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
4841 Info->mayNeedAGPRs()
4847 Info->mayNeedAGPRs()
4852 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
4853 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
4856 Info->mayNeedAGPRs()
4863 Info->mayNeedAGPRs()
4871 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
4872 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
4873 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
4874 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
4875 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
4876 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
4877 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
4878 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
4879 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
4880 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
4881 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
4882 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
4883 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
4884 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
4885 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
4886 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
4887 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
4888 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
4889 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
4890 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
4891 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
4892 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
4893 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
4894 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
4895 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
4896 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
4897 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
4898 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
4907 case Intrinsic::amdgcn_interp_p1:
4908 case Intrinsic::amdgcn_interp_p2:
4909 case Intrinsic::amdgcn_interp_mov:
4910 case Intrinsic::amdgcn_interp_p1_f16:
4911 case Intrinsic::amdgcn_interp_p2_f16:
4912 case Intrinsic::amdgcn_lds_param_load: {
4913 const int M0Idx =
MI.getNumOperands() - 1;
4914 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4916 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4918 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4919 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4920 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4924 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4927 case Intrinsic::amdgcn_interp_inreg_p10:
4928 case Intrinsic::amdgcn_interp_inreg_p2:
4929 case Intrinsic::amdgcn_interp_inreg_p10_f16:
4930 case Intrinsic::amdgcn_interp_inreg_p2_f16:
4931 case Intrinsic::amdgcn_interp_p10_rtz_f16:
4932 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
4933 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4934 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4935 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4936 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4937 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4940 case Intrinsic::amdgcn_permlane16_swap:
4941 case Intrinsic::amdgcn_permlane32_swap: {
4942 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4943 OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] =
4944 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4947 case Intrinsic::amdgcn_ballot: {
4948 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4949 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4950 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4951 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
4954 case Intrinsic::amdgcn_inverse_ballot: {
4956 Register MaskReg =
MI.getOperand(2).getReg();
4957 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4958 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4959 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4960 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4963 case Intrinsic::amdgcn_bitop3: {
4965 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4966 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4967 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4968 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4971 case Intrinsic::amdgcn_s_quadmask:
4972 case Intrinsic::amdgcn_s_wqm: {
4973 Register MaskReg =
MI.getOperand(2).getReg();
4974 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4975 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4976 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
4977 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4980 case Intrinsic::amdgcn_wave_reduce_umin:
4981 case Intrinsic::amdgcn_wave_reduce_umax: {
4982 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4983 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4984 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4987 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
4990 case Intrinsic::amdgcn_s_bitreplicate:
4991 Register MaskReg =
MI.getOperand(2).getReg();
4992 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4993 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
4994 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
4998 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4999 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
5000 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
5001 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
5002 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
5005 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
5012 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
5013 unsigned N =
MI.getNumExplicitOperands() - 2;
5014 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
5018 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5021 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5024 for (
unsigned I = 2;
I <
N; ++
I) {
5025 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
5026 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5031 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
5032 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
5033 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
5035 case Intrinsic::amdgcn_s_getreg:
5036 case Intrinsic::amdgcn_s_memtime:
5037 case Intrinsic::amdgcn_s_memrealtime:
5038 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
5039 case Intrinsic::amdgcn_s_sendmsg_rtn: {
5040 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5041 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5044 case Intrinsic::amdgcn_global_atomic_csub:
5045 case Intrinsic::amdgcn_global_atomic_fmin_num:
5046 case Intrinsic::amdgcn_global_atomic_fmax_num:
5047 case Intrinsic::amdgcn_flat_atomic_fmin_num:
5048 case Intrinsic::amdgcn_flat_atomic_fmax_num:
5049 case Intrinsic::amdgcn_atomic_cond_sub_u32:
5050 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
5051 case Intrinsic::amdgcn_global_load_tr_b64:
5052 case Intrinsic::amdgcn_global_load_tr_b128:
5053 case Intrinsic::amdgcn_ds_read_tr4_b64:
5054 case Intrinsic::amdgcn_ds_read_tr6_b96:
5055 case Intrinsic::amdgcn_ds_read_tr8_b64:
5056 case Intrinsic::amdgcn_ds_read_tr16_b64:
5058 case Intrinsic::amdgcn_ds_ordered_add:
5059 case Intrinsic::amdgcn_ds_ordered_swap: {
5060 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5061 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5063 AMDGPU::SGPRRegBankID);
5064 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
5065 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5068 case Intrinsic::amdgcn_ds_append:
5069 case Intrinsic::amdgcn_ds_consume: {
5070 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5071 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5075 case Intrinsic::amdgcn_exp_compr:
5076 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5077 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5079 case Intrinsic::amdgcn_exp:
5081 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5082 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5083 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5084 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5086 case Intrinsic::amdgcn_exp_row:
5087 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5088 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5089 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5090 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5093 case Intrinsic::amdgcn_s_sendmsg:
5094 case Intrinsic::amdgcn_s_sendmsghalt: {
5097 AMDGPU::SGPRRegBankID);
5098 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5101 case Intrinsic::amdgcn_s_setreg: {
5104 AMDGPU::SGPRRegBankID);
5105 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5108 case Intrinsic::amdgcn_s_ttracedata: {
5112 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5115 case Intrinsic::amdgcn_end_cf: {
5117 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5120 case Intrinsic::amdgcn_else: {
5122 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5123 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5124 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5127 case Intrinsic::amdgcn_init_whole_wave:
5128 case Intrinsic::amdgcn_live_mask: {
5129 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5132 case Intrinsic::amdgcn_wqm_demote:
5133 case Intrinsic::amdgcn_kill: {
5134 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5137 case Intrinsic::amdgcn_raw_buffer_load:
5138 case Intrinsic::amdgcn_raw_ptr_buffer_load:
5139 case Intrinsic::amdgcn_raw_atomic_buffer_load:
5140 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
5141 case Intrinsic::amdgcn_raw_tbuffer_load:
5142 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
5151 case Intrinsic::amdgcn_raw_buffer_load_lds:
5152 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
5159 case Intrinsic::amdgcn_raw_buffer_store:
5160 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5161 case Intrinsic::amdgcn_raw_buffer_store_format:
5162 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5163 case Intrinsic::amdgcn_raw_tbuffer_store:
5164 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5171 case Intrinsic::amdgcn_struct_buffer_load:
5172 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5173 case Intrinsic::amdgcn_struct_tbuffer_load:
5174 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5175 case Intrinsic::amdgcn_struct_atomic_buffer_load:
5176 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
5184 case Intrinsic::amdgcn_struct_buffer_load_lds:
5185 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5193 case Intrinsic::amdgcn_struct_buffer_store:
5194 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5195 case Intrinsic::amdgcn_struct_tbuffer_store:
5196 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5204 case Intrinsic::amdgcn_init_exec_from_input: {
5206 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5209 case Intrinsic::amdgcn_ds_gws_init:
5210 case Intrinsic::amdgcn_ds_gws_barrier:
5211 case Intrinsic::amdgcn_ds_gws_sema_br: {
5212 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5216 AMDGPU::SGPRRegBankID);
5217 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5220 case Intrinsic::amdgcn_ds_gws_sema_v:
5221 case Intrinsic::amdgcn_ds_gws_sema_p:
5222 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5225 AMDGPU::SGPRRegBankID);
5226 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5229 case Intrinsic::amdgcn_global_load_lds: {
5234 case Intrinsic::amdgcn_lds_direct_load: {
5235 const int M0Idx =
MI.getNumOperands() - 1;
5236 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5238 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5240 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5241 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5242 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5246 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5249 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5250 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5254 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
5267 case Intrinsic::amdgcn_s_sleep_var:
5270 case Intrinsic::amdgcn_s_barrier_join:
5271 case Intrinsic::amdgcn_s_wakeup_barrier:
5274 case Intrinsic::amdgcn_s_barrier_init:
5275 case Intrinsic::amdgcn_s_barrier_signal_var:
5279 case Intrinsic::amdgcn_s_barrier_signal_isfirst: {
5280 const unsigned ResultSize = 1;
5282 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5285 case Intrinsic::amdgcn_s_get_barrier_state:
5286 case Intrinsic::amdgcn_s_get_named_barrier_state: {
5291 case Intrinsic::amdgcn_pops_exiting_wave_id:
5293 case Intrinsic::amdgcn_s_prefetch_data: {
5303 case AMDGPU::G_SELECT: {
5304 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5306 AMDGPU::SGPRRegBankID);
5308 AMDGPU::SGPRRegBankID);
5309 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5310 Op3Bank == AMDGPU::SGPRRegBankID;
5312 unsigned CondBankDefault = SGPRSrcs ?
5313 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5316 if (CondBank == AMDGPU::SGPRRegBankID)
5317 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5318 else if (CondBank == AMDGPU::VGPRRegBankID)
5319 CondBank = AMDGPU::VCCRegBankID;
5321 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5322 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5324 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5328 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5329 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5330 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5331 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5333 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5334 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5335 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5336 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5342 case AMDGPU::G_SI_CALL: {
5343 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5349 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5350 if (
MI.getOperand(
I).isReg()) {
5354 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5359 case AMDGPU::G_LOAD:
5360 case AMDGPU::G_ZEXTLOAD:
5361 case AMDGPU::G_SEXTLOAD:
5364 case AMDGPU::G_ATOMICRMW_XCHG:
5365 case AMDGPU::G_ATOMICRMW_ADD:
5366 case AMDGPU::G_ATOMICRMW_SUB:
5367 case AMDGPU::G_ATOMICRMW_AND:
5368 case AMDGPU::G_ATOMICRMW_OR:
5369 case AMDGPU::G_ATOMICRMW_XOR:
5370 case AMDGPU::G_ATOMICRMW_MAX:
5371 case AMDGPU::G_ATOMICRMW_MIN:
5372 case AMDGPU::G_ATOMICRMW_UMAX:
5373 case AMDGPU::G_ATOMICRMW_UMIN:
5374 case AMDGPU::G_ATOMICRMW_FADD:
5375 case AMDGPU::G_ATOMICRMW_FMIN:
5376 case AMDGPU::G_ATOMICRMW_FMAX:
5377 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5378 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5379 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5385 case AMDGPU::G_ATOMIC_CMPXCHG: {
5392 case AMDGPU::G_BRCOND: {
5394 AMDGPU::SGPRRegBankID);
5395 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
5396 if (Bank != AMDGPU::SGPRRegBankID)
5397 Bank = AMDGPU::VCCRegBankID;
5399 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5402 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
5404 case AMDGPU::G_PREFETCH:
5411 MI.getNumOperands());
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)
static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)
static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)
Implement extending a 32-bit value to a 64-bit value.
static unsigned getExtendOp(unsigned Opc)
static bool isVectorRegisterBank(const RegisterBank &Bank)
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)
Split Ty into 2 pieces.
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)
Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.
static LLT widen96To128(LLT Ty)
static LLT getHalfSizedType(LLT Ty)
static unsigned getSBufferLoadCorrespondingBufferLoadOpcode(unsigned Opc)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static constexpr Register SPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isUniformMMO(const MachineMemOperand *MMO)
bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const
bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const
const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const
InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const
bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const
Legalize instruction MI where operands in OpIndices must be SGPRs.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const
Handle register layout difference for f16 images for some subtargets.
const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const
void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override
See RegisterBankInfo::applyMapping.
bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const
bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const
const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool isScalarLoadLegal(const MachineInstr &MI) const
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const
const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const
Split 64-bit value Reg into two 32-bit halves and populate them into Regs.
const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const
Return the mapping for a pointer argument.
unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isDivergentRegBank(const RegisterBank *RB) const override
Returns true if the register bank is considered divergent.
void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const
const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
const GCNSubtarget & Subtarget
const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const
bool isSALUMapping(const MachineInstr &MI) const
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const
bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
const SIRegisterInfo * TRI
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool hasScalarCompareEq64() const
bool hasScalarSubwordLoads() const
bool hasFullRate64Ops() const
bool hasScalarDwordx3Loads() const
bool hasScalarMulHiInsts() const
bool hasPseudoScalarTrans() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasSALUFloatInsts() const
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void createdInstr(MachineInstr &MI)=0
An instruction has been created and inserted into the function.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT getScalarType() const
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
TypeSize getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
const InstructionMapping & getInstrMapping() const
The final mapping of the instruction.
MachineInstr & getMI() const
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
const unsigned * Sizes
Hold the sizes of the register banks for all HwModes.
bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static IntegerType * getInt32Ty(LLVMContext &C)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
bool isFlatGlobalAddrSpace(unsigned AS)
bool isExtendedGlobalAddrSpace(unsigned AS)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
auto reverse(ContainerTy &&C)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
const RegisterBank * RegBank
Register bank where the partial value lives.
unsigned Length
Length of this mapping in bits.
Helper struct that represents how a value is mapped through different register banks.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
The llvm::once_flag structure.