25#include "llvm/IR/IntrinsicsAMDGPU.h"
27#define DEBUG_TYPE "amdgpu-regbanklegalize"
30using namespace AMDGPU;
36 MUI(MUI), RBI(RBI), RBLRules(RBLRules), IsWave32(ST.isWave32()),
37 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
38 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
39 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
56 lower(
MI, Mapping, WaterfallSgprs);
59bool RegBankLegalizeHelper::executeInWaterfallLoop(
71 unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
73 MovExecOpc = AMDGPU::S_MOV_B32;
74 MovExecTermOpc = AMDGPU::S_MOV_B32_term;
75 XorTermOpc = AMDGPU::S_XOR_B32_term;
76 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
77 ExecReg = AMDGPU::EXEC_LO;
79 MovExecOpc = AMDGPU::S_MOV_B64;
80 MovExecTermOpc = AMDGPU::S_MOV_B64_term;
81 XorTermOpc = AMDGPU::S_XOR_B64_term;
82 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
83 ExecReg = AMDGPU::EXEC;
87 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
91 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
92 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
95 B.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(InitSaveExecReg);
97 Register SavedExec =
MRI.createVirtualRegister(WaveRC);
124 B.setInsertPt(*LoopBB, LoopBB->
end());
175 auto NewEnd = BodyBB->
end();
176 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
184 if (!SGPROperandRegs.
count(OldReg))
189 auto OldVal = WaterfalledRegMap.
find(OldReg);
190 if (OldVal != WaterfalledRegMap.
end()) {
191 Op.setReg(OldVal->second);
196 LLT OpTy =
MRI.getType(OpReg);
200 Register CurrentLaneReg =
MRI.createVirtualRegister({SgprRB, OpTy});
205 unsigned PartSize = (OpSize % 64 == 0) ? 64 : 32;
207 unsigned NumParts = OpSize / PartSize;
213 CurrentLaneParts.
push_back(CurrentLaneReg);
215 auto UnmergeOp =
B.buildUnmerge({VgprRB, PartTy}, OpReg);
216 auto UnmergeCurrLane =
B.buildUnmerge({SgprRB, PartTy}, CurrentLaneReg);
217 for (
unsigned i = 0; i < NumParts; ++i) {
219 CurrentLaneParts.
push_back(UnmergeCurrLane.getReg(i));
223 for (
unsigned i = 0; i < NumParts; ++i) {
224 Register CmpReg =
MRI.createVirtualRegister(VccRB_S1);
230 CondReg =
B.buildAnd(VccRB_S1, CondReg, CmpReg).getReg(0);
233 Op.setReg(CurrentLaneReg);
236 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
242 MRI.createVirtualRegister({WaveRC,
LLT::scalar(IsWave32 ? 32 : 64)});
243 B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
246 B.buildInstr(AndSaveExecOpc)
249 MRI.setSimpleHint(SavedExec, CondRegLM);
251 B.setInsertPt(*BodyBB, BodyBB->
end());
254 B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec);
260 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
264 B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg);
267 B.setInsertPt(*RestoreExecBB, RestoreExecBB->
begin());
268 B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg);
272 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
280 assert(
MI.getNumMemOperands() == 1);
290 unsigned ByteOffset = 0;
291 for (
LLT PartTy : LLTBreakdown) {
293 if (ByteOffset == 0) {
294 BasePlusOffset =
Base;
296 auto Offset =
B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
301 auto LoadPart =
B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
302 LoadPartRegs.
push_back(LoadPart.getReg(0));
308 B.buildMergeLikeInstr(Dst, LoadPartRegs);
314 if (
MRI.getType(Reg) == MergeTy) {
317 auto Unmerge =
B.buildUnmerge({DstRB, MergeTy},
Reg);
318 for (
unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
319 MergeTyParts.
push_back(Unmerge.getReg(i));
322 B.buildMergeLikeInstr(Dst, MergeTyParts);
324 MI.eraseFromParent();
330 assert(
MI.getNumMemOperands() == 1);
337 auto WideLoad =
B.buildLoad({DstRB, WideTy},
Base, *WideMMO);
340 B.buildTrunc(Dst, WideLoad);
343 auto Unmerge =
B.buildUnmerge({DstRB, MergeTy}, WideLoad);
345 LLT DstTy =
MRI.getType(Dst);
347 for (
unsigned i = 0; i < NumElts; ++i) {
348 MergeTyParts.
push_back(Unmerge.getReg(i));
350 B.buildMergeLikeInstr(Dst, MergeTyParts);
352 MI.eraseFromParent();
357 LLT Ty =
MRI.getType(Dst);
359 unsigned Opc =
MI.getOpcode();
360 int TrueExtCst =
Opc == G_SEXT ? -1 : 1;
361 if (Ty == S32 || Ty == S16) {
362 auto True =
B.buildConstant({VgprRB, Ty}, TrueExtCst);
363 auto False =
B.buildConstant({VgprRB, Ty}, 0);
364 B.buildSelect(Dst, Src, True, False);
365 }
else if (Ty == S64) {
366 auto True =
B.buildConstant({VgprRB_S32}, TrueExtCst);
367 auto False =
B.buildConstant({VgprRB_S32}, 0);
368 auto Lo =
B.buildSelect({VgprRB_S32}, Src, True, False);
378 Hi =
B.buildUndef({VgprRB_S32});
384 B.buildMergeValues(Dst, {
Lo.getReg(0),
Hi.getReg(0)});
389 MI.eraseFromParent();
392std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(
Register Reg) {
393 auto PackedS32 =
B.buildBitcast(SgprRB_S32, Reg);
394 auto Mask =
B.buildConstant(SgprRB_S32, 0x0000ffff);
395 auto Lo =
B.buildAnd(SgprRB_S32, PackedS32, Mask);
396 auto Hi =
B.buildLShr(SgprRB_S32, PackedS32,
B.buildConstant(SgprRB_S32, 16));
397 return {
Lo.getReg(0),
Hi.getReg(0)};
400std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(
Register Reg) {
401 auto PackedS32 =
B.buildBitcast(SgprRB_S32, Reg);
402 auto Lo =
B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
403 auto Hi =
B.buildAShr(SgprRB_S32, PackedS32,
B.buildConstant(SgprRB_S32, 16));
404 return {
Lo.getReg(0),
Hi.getReg(0)};
407std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(
Register Reg) {
408 auto PackedS32 =
B.buildBitcast(SgprRB_S32, Reg);
410 auto Hi =
B.buildLShr(SgprRB_S32, PackedS32,
B.buildConstant(SgprRB_S32, 16));
411 return {
Lo.getReg(0),
Hi.getReg(0)};
414void RegBankLegalizeHelper::lowerUnpackBitShift(
MachineInstr &
MI) {
416 switch (
MI.getOpcode()) {
417 case AMDGPU::G_SHL: {
418 auto [Val0, Val1] = unpackAExt(
MI.getOperand(1).getReg());
419 auto [Amt0, Amt1] = unpackAExt(
MI.getOperand(2).getReg());
420 Lo =
B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
421 Hi =
B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
424 case AMDGPU::G_LSHR: {
425 auto [Val0, Val1] = unpackZExt(
MI.getOperand(1).getReg());
426 auto [Amt0, Amt1] = unpackZExt(
MI.getOperand(2).getReg());
427 Lo =
B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
428 Hi =
B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
431 case AMDGPU::G_ASHR: {
432 auto [Val0, Val1] = unpackSExt(
MI.getOperand(1).getReg());
433 auto [Amt0, Amt1] = unpackSExt(
MI.getOperand(2).getReg());
434 Lo =
B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
435 Hi =
B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
441 B.buildBuildVectorTrunc(
MI.getOperand(0).getReg(), {Lo, Hi});
442 MI.eraseFromParent();
447 return (GI->is(Intrinsic::amdgcn_sbfe));
449 return MI.getOpcode() == AMDGPU::G_SBFX;
456 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
459 Register Src =
MI.getOperand(FirstOpnd).getReg();
460 Register LSBit =
MI.getOperand(FirstOpnd + 1).getReg();
461 Register Width =
MI.getOperand(FirstOpnd + 2).getReg();
466 unsigned SHROpc =
Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
467 auto SHRSrc =
B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
475 auto Amt =
B.buildSub(VgprRB_S32,
B.buildConstant(SgprRB_S32, 64), Width);
476 auto SignBit =
B.buildShl({VgprRB, S64}, SHRSrc, Amt);
477 B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
478 MI.eraseFromParent();
482 uint64_t WidthImm = ConstWidth->Value.getZExtValue();
483 auto UnmergeSHRSrc =
B.buildUnmerge(VgprRB_S32, SHRSrc);
484 Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
485 Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
486 auto Zero =
B.buildConstant({VgprRB, S32}, 0);
487 unsigned BFXOpc =
Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
489 if (WidthImm <= 32) {
491 auto Lo =
B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo,
Zero, Width});
495 Hi =
B.buildAShr(VgprRB_S32,
Lo,
B.buildConstant(VgprRB_S32, 31));
500 B.buildMergeLikeInstr(Dst, {
Lo,
Hi});
502 auto Amt =
B.buildConstant(VgprRB_S32, WidthImm - 32);
504 auto Hi =
B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi,
Zero, Amt});
505 B.buildMergeLikeInstr(Dst, {SHRSrcLo,
Hi});
508 MI.eraseFromParent();
513 LLT Ty =
MRI.getType(DstReg);
515 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
516 Register Src =
MI.getOperand(FirstOpnd).getReg();
517 Register LSBit =
MI.getOperand(FirstOpnd + 1).getReg();
518 Register Width =
MI.getOperand(FirstOpnd + 2).getReg();
524 auto Mask =
B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
525 auto FieldOffset =
B.buildAnd(SgprRB_S32, LSBit, Mask);
526 auto Size =
B.buildShl(SgprRB_S32, Width,
B.buildConstant(SgprRB_S32, 16));
527 auto Src1 =
B.buildOr(SgprRB_S32, FieldOffset,
Size);
528 unsigned Opc32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
529 unsigned Opc64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
530 unsigned Opc = Ty == S32 ? Opc32 : Opc64;
534 auto S_BFE =
B.buildInstr(
Opc, {{SgprRB, Ty}},
535 {
B.buildCopy(Ty, Src),
B.buildCopy(S32, Src1)});
540 B.buildCopy(DstReg,
S_BFE->getOperand(0).getReg());
541 MI.eraseFromParent();
546 LLT DstTy =
MRI.getType(Dst);
547 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
548 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
549 auto Op1 =
B.buildUnmerge({VgprRB, Ty},
MI.getOperand(1).
getReg());
550 auto Op2 =
B.buildUnmerge({VgprRB, Ty},
MI.getOperand(2).
getReg());
551 unsigned Opc =
MI.getOpcode();
554 B.buildInstr(
Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)},
Flags);
556 B.buildInstr(
Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)},
Flags);
557 B.buildMergeLikeInstr(Dst, {
Lo,
Hi});
558 MI.eraseFromParent();
561void RegBankLegalizeHelper::lowerSplitTo32Select(
MachineInstr &
MI) {
563 LLT DstTy =
MRI.getType(Dst);
564 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 ||
566 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
567 auto Op2 =
B.buildUnmerge({VgprRB, Ty},
MI.getOperand(2).
getReg());
568 auto Op3 =
B.buildUnmerge({VgprRB, Ty},
MI.getOperand(3).
getReg());
572 B.buildSelect({VgprRB, Ty},
Cond, Op2.getReg(0), Op3.getReg(0), Flags);
574 B.buildSelect({VgprRB, Ty},
Cond, Op2.getReg(1), Op3.getReg(1), Flags);
576 B.buildMergeLikeInstr(Dst, {
Lo,
Hi});
577 MI.eraseFromParent();
580void RegBankLegalizeHelper::lowerSplitTo32SExtInReg(
MachineInstr &
MI) {
581 auto Op1 =
B.buildUnmerge(VgprRB_S32,
MI.getOperand(1).getReg());
582 int Amt =
MI.getOperand(2).getImm();
586 auto Freeze =
B.buildFreeze(VgprRB_S32, Op1.getReg(0));
589 Lo = Freeze.getReg(0);
592 Lo =
B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
595 auto SignExtCst =
B.buildConstant(SgprRB_S32, 31);
596 Hi =
B.buildAShr(VgprRB_S32,
Lo, SignExtCst).getReg(0);
600 Hi =
B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
603 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(), {Lo, Hi});
604 MI.eraseFromParent();
615 return lowerVccExtToSel(
MI);
617 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
618 auto True =
B.buildConstant({SgprRB, Ty},
619 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
620 auto False =
B.buildConstant({SgprRB, Ty}, 0);
624 B.buildSelect(
MI.getOperand(0).getReg(),
MI.getOperand(1).getReg(), True,
626 MI.eraseFromParent();
630 return lowerUnpackBitShift(
MI);
634 switch (
MI.getOpcode()) {
635 case AMDGPU::G_ZEXT: {
636 Hi =
B.buildConstant({RB, S32}, 0);
639 case AMDGPU::G_SEXT: {
641 auto ShiftAmt =
B.buildConstant({RB, S32}, 31);
642 Hi =
B.buildAShr({RB, S32},
MI.getOperand(1).
getReg(), ShiftAmt);
645 case AMDGPU::G_ANYEXT: {
646 Hi =
B.buildUndef({RB, S32});
653 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(),
654 {MI.getOperand(1).getReg(), Hi});
655 MI.eraseFromParent();
659 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
660 B.buildConstant(
MI.getOperand(0).getReg(), ConstVal);
662 MI.eraseFromParent();
667 LLT Ty =
MRI.getType(Src);
671 Register BoolSrc =
MRI.createVirtualRegister({VgprRB, Ty});
673 auto Src64 =
B.buildUnmerge(VgprRB_S32, Src);
674 auto One =
B.buildConstant(VgprRB_S32, 1);
675 auto AndLo =
B.buildAnd(VgprRB_S32, Src64.getReg(0), One);
676 auto Zero =
B.buildConstant(VgprRB_S32, 0);
677 auto AndHi =
B.buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
678 B.buildMergeLikeInstr(BoolSrc, {AndLo, AndHi});
680 assert(Ty == S32 || Ty == S16);
681 auto One =
B.buildConstant({VgprRB, Ty}, 1);
682 B.buildAnd(BoolSrc, Src, One);
684 auto Zero =
B.buildConstant({VgprRB, Ty}, 0);
686 MI.eraseFromParent();
690 return lowerV_BFE(
MI);
692 return lowerS_BFE(
MI);
694 return lowerSplitTo32(
MI);
696 return lowerSplitTo32Select(
MI);
698 return lowerSplitTo32SExtInReg(
MI);
700 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
713 else if (
Size / 128 == 4)
721 else if (DstTy == S96)
722 splitLoad(
MI, {S64, S32}, S32);
723 else if (DstTy == V3S32)
724 splitLoad(
MI, {V2S32, S32}, S32);
725 else if (DstTy == V6S16)
726 splitLoad(
MI, {V4S16, V2S16}, V2S16);
734 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
737 else if (DstTy == V3S32)
738 widenLoad(
MI, V4S32, S32);
739 else if (DstTy == V6S16)
740 widenLoad(
MI, V8S16, V2S16);
749 if (!WaterfallSgprs.
empty()) {
751 executeInWaterfallLoop(B,
make_range(
I, std::next(
I)), WaterfallSgprs);
942void RegBankLegalizeHelper::applyMappingDst(
951 LLT Ty =
MRI.getType(Reg);
954 switch (MethodIDs[
OpIdx]) {
1011 Register NewDst =
MRI.createVirtualRegister(VccRB_S1);
1014 B.buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
1015 B.buildTrunc(Reg, CopyS32_Vcc);
1023 Register NewVgprDst =
MRI.createVirtualRegister({VgprRB, Ty});
1024 Op.setReg(NewVgprDst);
1036 Register NewVgprDst =
MRI.createVirtualRegister({VgprRB, Ty});
1037 Op.setReg(NewVgprDst);
1045 Register NewDst =
MRI.createVirtualRegister(SgprRB_S32);
1047 B.buildTrunc(Reg, NewDst);
1060void RegBankLegalizeHelper::applyMappingSrc(
1064 for (
unsigned i = 0; i < MethodIDs.
size(); ++
OpIdx, ++i) {
1065 if (MethodIDs[i] ==
None || MethodIDs[i] ==
IntrId || MethodIDs[i] ==
Imm)
1070 LLT Ty =
MRI.getType(Reg);
1073 switch (MethodIDs[i]) {
1076 assert(RB == VccRB || RB == SgprRB);
1078 auto Aext =
B.buildAnyExt(SgprRB_S32, Reg);
1080 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
1081 Op.setReg(CopyVcc_Scc.getReg(0));
1097 assert(Ty == getTyFromID(MethodIDs[i]));
1098 assert(RB == getRegBankFromID(MethodIDs[i]));
1111 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1112 assert(RB == getRegBankFromID(MethodIDs[i]));
1128 assert(Ty == getTyFromID(MethodIDs[i]));
1130 auto CopyToVgpr =
B.buildCopy({VgprRB, Ty},
Reg);
1131 Op.setReg(CopyToVgpr.getReg(0));
1145 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1147 auto CopyToVgpr =
B.buildCopy({VgprRB, Ty},
Reg);
1148 Op.setReg(CopyToVgpr.getReg(0));
1155 assert(Ty == getTyFromID(MethodIDs[i]));
1157 SgprWaterfallOperandRegs.
insert(Reg);
1165 auto Aext =
B.buildAnyExt(SgprRB_S32, Reg);
1166 Op.setReg(Aext.getReg(0));
1173 auto Aext =
B.buildAnyExt(SgprRB_S32, Reg);
1176 auto Cst1 =
B.buildConstant(SgprRB_S32, 1);
1177 auto BoolInReg =
B.buildAnd(SgprRB_S32, Aext, Cst1);
1178 Op.setReg(BoolInReg.getReg(0));
1184 auto Sext =
B.buildSExt(SgprRB_S32, Reg);
1185 Op.setReg(Sext.getReg(0));
1191 auto Zext =
B.buildZExt({SgprRB, S32},
Reg);
1192 Op.setReg(Zext.getReg(0));
1199 auto Sext =
B.buildSExt({VgprRB, S32},
Reg);
1200 Op.setReg(Sext.getReg(0));
1207 auto Zext =
B.buildZExt({VgprRB, S32},
Reg);
1208 Op.setReg(Zext.getReg(0));
1219 LLT Ty =
MRI.getType(Dst);
1222 B.setInsertPt(*
MI.getParent(),
MI.getParent()->getFirstNonPHI());
1224 Register NewDst =
MRI.createVirtualRegister(SgprRB_S32);
1225 MI.getOperand(0).setReg(NewDst);
1226 B.buildTrunc(Dst, NewDst);
1228 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1236 auto NewUse = B.buildAnyExt(SgprRB_S32,
UseReg);
1237 MI.getOperand(i).setReg(NewUse.getReg(0));
1249 "before RegBankLegalize to lower lane mask(vcc) phis");
1267 unsigned StartOpIdx,
1268 unsigned EndOpIdx) {
1269 for (
unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
1270 if (
MRI.getRegBankOrNull(
MI.getOperand(i).getReg()) != RB)
1279 unsigned NumDefs =
MI.getNumDefs();
1280 unsigned NumOperands =
MI.getNumOperands();
1288 for (
unsigned i = NumDefs; i < NumOperands; ++i) {
1290 if (
MRI.getRegBank(Reg) != RB) {
1291 auto Copy = B.buildCopy({VgprRB,
MRI.getType(Reg)}, Reg);
1292 MI.getOperand(i).setReg(Copy.getReg(0));
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Provides AMDGPU specific target descriptions.
static bool isSignedBFE(MachineInstr &MI)
static bool verifyRegBankOnOperands(MachineInstr &MI, const RegisterBank *RB, MachineRegisterInfo &MRI, unsigned StartOpIdx, unsigned EndOpIdx)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
static Register UseReg(const MachineOperand &MO)
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
void applyMappingTrivial(MachineInstr &MI)
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI, const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
void findRuleAndApplyMapping(MachineInstr &MI)
void applyMappingPHI(MachineInstr &MI)
const SetOfRulesForOpcode & getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
const SIInstrInfo * getInstrInfo() const override
const SIRegisterInfo * getRegisterInfo() const override
Represents a call to an intrinsic.
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isAnyPtr(LLT Ty, unsigned Width)
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LoweringMethodID LoweringMethod
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping