2194 B.setInstrAndDebugLoc(
MI);
2195 unsigned Opc =
MI.getOpcode();
2198 case AMDGPU::G_CONSTANT:
2199 case AMDGPU::G_IMPLICIT_DEF: {
2201 LLT DstTy =
MRI.getType(DstReg);
2207 if (DstBank == &AMDGPU::VCCRegBank)
2210 if (DefRegs.
empty())
2213 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2216 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2218 MI.getOperand(0).setReg(NewDstReg);
2219 if (
Opc != AMDGPU::G_IMPLICIT_DEF) {
2220 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2221 MI.getOperand(1).setCImm(
2225 MRI.setRegBank(NewDstReg, *DstBank);
2226 B.buildTrunc(DefRegs[0], NewDstReg);
2229 case AMDGPU::G_PHI: {
2231 LLT DstTy =
MRI.getType(DstReg);
2238 if (DstBank == &AMDGPU::VCCRegBank) {
2245 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2249 if (SrcBank != &AMDGPU::VCCRegBank) {
2254 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2255 MI.getOperand(
I).setReg(Copy.getReg(0));
2266 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2267 B.setInsertPt(
B.getMBB(),
MI);
2275 case AMDGPU::G_FCMP:
2279 case AMDGPU::G_ICMP:
2280 case AMDGPU::G_UADDO:
2281 case AMDGPU::G_USUBO:
2282 case AMDGPU::G_UADDE:
2283 case AMDGPU::G_SADDE:
2284 case AMDGPU::G_USUBE:
2285 case AMDGPU::G_SSUBE: {
2286 unsigned BoolDstOp =
2287 (
Opc == AMDGPU::G_ICMP ||
Opc == AMDGPU::G_FCMP) ? 0 : 1;
2288 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2292 if (DstBank != &AMDGPU::SGPRRegBank)
2295 const bool HasCarryIn =
MI.getNumOperands() == 5;
2301 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2302 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2306 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2307 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2308 MI.getOperand(4).setReg(NewSrcReg);
2312 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2317 if (DefRegs.
empty())
2319 B.buildTrunc(DefRegs[0], NewDstReg);
2322 case AMDGPU::G_SELECT: {
2324 LLT DstTy =
MRI.getType(DstReg);
2327 if (CondRegs.
empty())
2334 if (CondBank == &AMDGPU::SGPRRegBank) {
2337 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2339 MI.getOperand(1).setReg(NewCondReg);
2340 B.buildZExt(NewCondReg, CondRegs[0]);
2353 if (DefRegs.
empty()) {
2358 if (Src1Regs.
empty())
2364 if (Src2Regs.
empty())
2371 auto Flags =
MI.getFlags();
2372 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0], Flags);
2373 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1], Flags);
2375 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2376 MI.eraseFromParent();
2379 case AMDGPU::G_BRCOND: {
2380 Register CondReg =
MI.getOperand(0).getReg();
2385 if (CondBank == &AMDGPU::SGPRRegBank) {
2388 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2390 MI.getOperand(0).setReg(NewCondReg);
2391 B.buildZExt(NewCondReg, CondReg);
2399 case AMDGPU::G_XOR: {
2403 LLT DstTy =
MRI.getType(DstReg);
2409 if (DstBank == &AMDGPU::VCCRegBank)
2413 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2422 if (DstTy.
getSizeInBits() == 16 && DstBank == &AMDGPU::SGPRRegBank) {
2426 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2431 if (
MI.getOpcode() == AMDGPU::G_XOR &&
2452 if (DefRegs.
empty()) {
2459 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2465 if (Src0Regs.
empty())
2470 if (Src1Regs.
empty())
2477 auto Flags =
MI.getFlags();
2478 B.buildInstr(
Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]}, Flags);
2479 B.buildInstr(
Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]}, Flags);
2481 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2482 MI.eraseFromParent();
2485 case AMDGPU::G_ABS: {
2491 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2493 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2506 case AMDGPU::G_LSHR:
2507 case AMDGPU::G_ASHR:
2508 case AMDGPU::G_SMIN:
2509 case AMDGPU::G_SMAX:
2510 case AMDGPU::G_UMIN:
2511 case AMDGPU::G_UMAX: {
2513 LLT DstTy =
MRI.getType(DstReg);
2518 if (!
Subtarget.hasVectorMulU64() &&
Opc == AMDGPU::G_MUL &&
2531 if (DstBank == &AMDGPU::VGPRRegBank)
2537 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2542 std::tie(WideSrcLo, WideSrcHi) =
2544 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2545 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2546 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2547 MI.eraseFromParent();
2556 std::tie(WideSrc0Lo, WideSrc0Hi)
2558 std::tie(WideSrc1Lo, WideSrc1Hi)
2560 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2561 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2562 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2563 MI.eraseFromParent();
2571 if (
Opc == AMDGPU::G_SHL ||
Opc == AMDGPU::G_LSHR ||
2572 Opc == AMDGPU::G_ASHR) {
2573 B.setInsertPt(*
MBB,
MI.getIterator());
2581 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2582 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2596 Register SrcReg0 =
MI.getOperand(1).getReg();
2597 Register SrcReg1 =
MI.getOperand(2).getReg();
2600 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2601 "that handles only 64-bit operands.");
2607 if (DstBank == &AMDGPU::SGPRRegBank) {
2608 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2609 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2610 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2611 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2617 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2618 "The destination operand should be in vector registers.");
2621 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2622 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2624 B.buildTrunc(Op0L, SrcReg0);
2627 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2628 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2630 B.buildTrunc(Op1L, SrcReg1);
2632 unsigned NewOpc =
Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2633 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2634 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2638 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2639 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2640 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2641 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2642 MI.eraseFromParent();
2645 case AMDGPU::G_SEXT_INREG: {
2647 if (SrcRegs.
empty())
2651 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2658 int Amt =
MI.getOperand(2).getImm();
2664 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2666 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2668 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2671 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2675 B.buildCopy(DstRegs[0], SrcRegs[0]);
2676 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2680 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2681 MI.eraseFromParent();
2684 case AMDGPU::G_CTPOP:
2685 case AMDGPU::G_BITREVERSE: {
2688 if (DstBank == &AMDGPU::SGPRRegBank)
2693 LLT Ty =
MRI.getType(SrcReg);
2697 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2706 case AMDGPU::G_AMDGPU_FFBH_U32:
2707 case AMDGPU::G_AMDGPU_FFBL_B32:
2708 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2709 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2712 if (DstBank == &AMDGPU::SGPRRegBank)
2717 LLT Ty =
MRI.getType(SrcReg);
2727 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2729 unsigned NewOpc =
Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2730 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2731 :
Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2732 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2734 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2735 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[Idx]});
2736 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[Idx ^ 1]});
2738 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF ||
Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2740 : AMDGPU::G_UADDSAT;
2741 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2743 B.buildUMin(DstReg,
X,
Y);
2744 MI.eraseFromParent();
2747 case AMDGPU::G_SEXT:
2748 case AMDGPU::G_ZEXT:
2749 case AMDGPU::G_ANYEXT: {
2751 LLT SrcTy =
MRI.getType(SrcReg);
2752 const bool Signed =
Opc == AMDGPU::G_SEXT;
2760 LLT DstTy =
MRI.getType(DstReg);
2762 SrcBank != &AMDGPU::SGPRRegBank &&
2763 SrcBank != &AMDGPU::VCCRegBank &&
2767 SrcTy.getSizeInBits() <= 32) {
2773 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2774 }
else if (
Opc == AMDGPU::G_ZEXT) {
2775 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2777 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2781 MRI.setRegBank(DstReg, *SrcBank);
2782 MI.eraseFromParent();
2792 if (SrcBank == &AMDGPU::VCCRegBank) {
2799 const bool UseSel64 = DstSize > 32 &&
2800 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2804 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2805 auto False =
B.buildConstant(SelType, 0);
2807 MRI.setRegBank(True.getReg(0), *DstBank);
2808 MRI.setRegBank(False.getReg(0), *DstBank);
2809 MRI.setRegBank(DstReg, *DstBank);
2812 B.buildSelect(DefRegs[0], SrcReg, True, False);
2814 }
else if (DstSize < 32) {
2815 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2816 MRI.setRegBank(Sel.getReg(0), *DstBank);
2817 B.buildTrunc(DstReg, Sel);
2819 B.buildSelect(DstReg, SrcReg, True, False);
2822 MI.eraseFromParent();
2828 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2837 LLT DstTy =
MRI.getType(DstReg);
2838 LLT SrcTy =
MRI.getType(SrcReg);
2840 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2852 unsigned ConstOffset;
2853 std::tie(BaseIdxReg, ConstOffset) =
2860 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2862 ConstOffset < SrcTy.getNumElements();
2865 if (ShouldMoveIndexIntoLoop)
2866 MI.getOperand(2).setReg(BaseIdxReg);
2872 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2873 SrcBank == &AMDGPU::SGPRRegBank;
2874 if (DstRegs.
empty()) {
2879 if (NeedCopyToVGPR) {
2881 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2882 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2883 MI.getOperand(0).setReg(TmpReg);
2884 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2891 if (ShouldMoveIndexIntoLoop)
2901 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2902 auto One =
B.buildConstant(
S32, 1);
2913 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2914 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2916 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2917 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2919 MRI.setRegBank(DstReg, *DstBank);
2920 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2921 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2922 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2923 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2927 MI.eraseFromParent();
2933 B.setInstr(*Span.
begin());
2934 MI.eraseFromParent();
2938 if (NeedCopyToVGPR) {
2942 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2943 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2945 Extract0->getOperand(0).setReg(TmpReg0);
2946 Extract1->getOperand(0).setReg(TmpReg1);
2954 if (ShouldMoveIndexIntoLoop)
2959 case AMDGPU::G_INSERT_VECTOR_ELT: {
2963 LLT VecTy =
MRI.getType(DstReg);
2969 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2971 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2979 LLT InsTy =
MRI.getType(InsReg);
2983 unsigned ConstOffset;
2984 std::tie(BaseIdxReg, ConstOffset) =
2991 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2996 if (ShouldMoveIndexIntoLoop)
2997 MI.getOperand(3).setReg(BaseIdxReg);
3000 if (InsRegs.
empty()) {
3004 if (ShouldMoveIndexIntoLoop) {
3016 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
3017 auto One =
B.buildConstant(
S32, 1);
3026 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
3027 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
3029 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
3030 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3039 MRI.setRegBank(InsReg, *InsSrcBank);
3040 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3041 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3042 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3043 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3044 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3045 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3050 B.setInsertPt(
B.getMBB(),
MI);
3051 B.buildBitcast(DstReg, InsHi);
3052 MI.eraseFromParent();
3056 B.setInstr(*Span.
begin());
3057 MI.eraseFromParent();
3068 B.buildBitcast(DstReg, InsHi);
3071 if (ShouldMoveIndexIntoLoop)
3076 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3077 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3078 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3079 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3080 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3081 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3082 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3083 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3084 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3085 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3086 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3087 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3088 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3089 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3090 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3091 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3092 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3093 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3094 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3095 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3096 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3097 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3102 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3103 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3104 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3105 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3106 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3107 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3108 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3109 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3110 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3111 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3112 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3113 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
3114 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3115 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3116 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3121 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3126 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3127 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3128 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3129 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3130 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3134 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
3138 case AMDGPU::G_INTRINSIC:
3139 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3141 case Intrinsic::amdgcn_readlane: {
3152 case Intrinsic::amdgcn_writelane: {
3162 case Intrinsic::amdgcn_interp_p1:
3163 case Intrinsic::amdgcn_interp_p2:
3164 case Intrinsic::amdgcn_interp_mov:
3165 case Intrinsic::amdgcn_interp_p1_f16:
3166 case Intrinsic::amdgcn_interp_p2_f16:
3167 case Intrinsic::amdgcn_lds_param_load: {
3175 case Intrinsic::amdgcn_interp_inreg_p10:
3176 case Intrinsic::amdgcn_interp_inreg_p2:
3177 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3178 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3179 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3180 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3181 case Intrinsic::amdgcn_permlane16_swap:
3182 case Intrinsic::amdgcn_permlane32_swap:
3185 case Intrinsic::amdgcn_permlane16:
3186 case Intrinsic::amdgcn_permlanex16: {
3194 case Intrinsic::amdgcn_permlane_bcast:
3195 case Intrinsic::amdgcn_permlane_up:
3196 case Intrinsic::amdgcn_permlane_down:
3197 case Intrinsic::amdgcn_permlane_xor:
3202 case Intrinsic::amdgcn_permlane_idx_gen: {
3206 case Intrinsic::amdgcn_sbfe:
3209 case Intrinsic::amdgcn_ubfe:
3212 case Intrinsic::amdgcn_inverse_ballot:
3213 case Intrinsic::amdgcn_s_bitreplicate:
3214 case Intrinsic::amdgcn_s_quadmask:
3215 case Intrinsic::amdgcn_s_wqm:
3219 case Intrinsic::amdgcn_ballot:
3225 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3226 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3227 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3228 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3229 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3239 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3240 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
3241 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3243 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
3244 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
3245 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
3246 unsigned LastRegOpIdx =
MI.getNumExplicitOperands() - 1 - NumMods;
3251 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3252 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3255 case Intrinsic::amdgcn_ds_ordered_add:
3256 case Intrinsic::amdgcn_ds_ordered_swap: {
3263 case Intrinsic::amdgcn_ds_gws_init:
3264 case Intrinsic::amdgcn_ds_gws_barrier:
3265 case Intrinsic::amdgcn_ds_gws_sema_br: {
3271 case Intrinsic::amdgcn_ds_gws_sema_v:
3272 case Intrinsic::amdgcn_ds_gws_sema_p:
3273 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3278 case Intrinsic::amdgcn_ds_append:
3279 case Intrinsic::amdgcn_ds_consume: {
3283 case Intrinsic::amdgcn_s_sendmsg:
3284 case Intrinsic::amdgcn_s_sendmsghalt: {
3289 case Intrinsic::amdgcn_s_setreg: {
3293 case Intrinsic::amdgcn_s_ttracedata:
3296 case Intrinsic::amdgcn_raw_buffer_load_lds:
3297 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3304 case Intrinsic::amdgcn_struct_buffer_load_lds:
3305 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3312 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:
3313 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
3314 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
3315 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
3320 case Intrinsic::amdgcn_load_to_lds:
3321 case Intrinsic::amdgcn_global_load_lds: {
3326 case Intrinsic::amdgcn_lds_direct_load: {
3332 case Intrinsic::amdgcn_exp_row:
3336 case Intrinsic::amdgcn_cluster_load_b32:
3337 case Intrinsic::amdgcn_cluster_load_b64:
3338 case Intrinsic::amdgcn_cluster_load_b128: {
3343 case Intrinsic::amdgcn_s_sleep_var:
3347 case Intrinsic::amdgcn_s_barrier_join:
3350 case Intrinsic::amdgcn_s_barrier_init:
3351 case Intrinsic::amdgcn_s_barrier_signal_var:
3355 case Intrinsic::amdgcn_s_get_barrier_state:
3356 case Intrinsic::amdgcn_s_get_named_barrier_state: {
3360 case Intrinsic::amdgcn_s_prefetch_data: {
3362 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3367 MI.eraseFromParent();
3370 case Intrinsic::amdgcn_tensor_load_to_lds:
3371 case Intrinsic::amdgcn_tensor_store_from_lds: {
3378 case Intrinsic::amdgcn_tensor_load_to_lds_d2:
3379 case Intrinsic::amdgcn_tensor_store_from_lds_d2: {
3390 if (RSrcIntrin->IsImage) {
3401 case AMDGPU::G_SI_CALL: {
3412 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3413 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3419 unsigned NonCopyInstrsLen = 0;
3425 while (Start->getOpcode() != FrameSetupOpcode) {
3427 bool IsCopy =
false;
3428 if (Start->getOpcode() == AMDGPU::COPY) {
3429 auto &Dst = Start->getOperand(0);
3432 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3437 auto &Src = Start->getOperand(1);
3440 IsCopy = Info->getScratchRSrcReg() == Reg;
3448 NonCopyInstrsLen = NonCopyInstrs.
size();
3453 NonCopyInstrs.
resize(NonCopyInstrsLen);
3455 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3456 MBB->splice(LastCopy,
MBB, NonCopy->getIterator());
3461 NonCopyInstrs.
clear();
3462 NonCopyInstrsLen = 0;
3465 while (End->getOpcode() != FrameDestroyOpcode) {
3467 bool IsCopy =
false;
3468 if (End->getOpcode() == AMDGPU::COPY) {
3469 auto &Src = End->getOperand(1);
3472 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3478 NonCopyInstrsLen = NonCopyInstrs.
size();
3483 NonCopyInstrs.
resize(NonCopyInstrsLen);
3487 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3488 MBB->splice(LastCopy,
MBB, NonCopy->getIterator());
3492 B.setInsertPt(
B.getMBB(), Start);
3496 case AMDGPU::G_LOAD:
3497 case AMDGPU::G_ZEXTLOAD:
3498 case AMDGPU::G_SEXTLOAD: {
3503 case AMDGPU::G_DYN_STACKALLOC:
3506 case AMDGPU::G_STACKRESTORE: {
3511 case AMDGPU::G_SBFX:
3514 case AMDGPU::G_UBFX:
3517 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3518 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3521 case AMDGPU::G_PREFETCH: {
3523 MI.eraseFromParent();
3528 if (PtrBank == AMDGPU::VGPRRegBankID &&
3529 (!
Subtarget.hasVmemPrefInsts() || !
MI.getOperand(3).getImm())) {
3531 MI.eraseFromParent();
3534 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3539 !
MI.getOperand(3).getImm() ))) {
3540 MI.eraseFromParent();
3837 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3852 DstBank = &AMDGPU::VCCRegBank;
3855 DstBank = &AMDGPU::VCCRegBank;
3866 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3871 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3873 OpdsMapping[0] = &ValMap;
3874 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3875 OpdsMapping[1] = &ValMap;
3882 if (
MI.isRegSequence()) {
3885 unsigned BankID = AMDGPU::SGPRRegBankID;
3887 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3891 if (OpBank != AMDGPU::SGPRRegBankID) {
3892 BankID = AMDGPU::VGPRRegBankID;
3909 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3914 ResultBank = DstBank->
getID();
3916 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3921 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3922 ResultBank = AMDGPU::VGPRRegBankID;
3927 unsigned OpBank = Bank->
getID();
3931 assert(ResultBank != AMDGPU::InvalidRegBankID);
3933 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3948 switch (
MI.getOpcode()) {
3955 case AMDGPU::G_MUL: {
3956 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3961 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3962 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3963 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3965 TargetBankID = DstBank->
getID();
3966 if (DstBank == &AMDGPU::VCCRegBank) {
3967 TargetBankID = AMDGPU::VCCRegBankID;
3968 BankLHS = AMDGPU::VCCRegBankID;
3969 BankRHS = AMDGPU::VCCRegBankID;
3972 AMDGPU::SGPRRegBankID);
3974 AMDGPU::SGPRRegBankID);
3978 AMDGPU::VCCRegBankID);
3980 AMDGPU::VCCRegBankID);
3983 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3984 TargetBankID = AMDGPU::VGPRRegBankID;
3985 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3986 TargetBankID = AMDGPU::VCCRegBankID;
3987 BankLHS = AMDGPU::VCCRegBankID;
3988 BankRHS = AMDGPU::VCCRegBankID;
3989 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3990 TargetBankID = AMDGPU::SGPRRegBankID;
3994 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3995 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3996 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
4003 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
4004 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
4006 if (
MI.getOpcode() == AMDGPU::G_MUL &&
Subtarget.hasVectorMulU64())
4007 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4010 getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
4012 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
4015 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
4023 case AMDGPU::G_PTR_ADD:
4024 case AMDGPU::G_PTRMASK:
4028 case AMDGPU::G_LSHR:
4029 case AMDGPU::G_ASHR:
4030 case AMDGPU::G_UADDO:
4031 case AMDGPU::G_USUBO:
4032 case AMDGPU::G_UADDE:
4033 case AMDGPU::G_SADDE:
4034 case AMDGPU::G_USUBE:
4035 case AMDGPU::G_SSUBE:
4037 case AMDGPU::G_SHUFFLE_VECTOR:
4038 case AMDGPU::G_SBFX:
4039 case AMDGPU::G_UBFX:
4040 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
4041 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
4045 case AMDGPU::G_SMIN:
4046 case AMDGPU::G_SMAX:
4047 case AMDGPU::G_UMIN:
4048 case AMDGPU::G_UMAX:
4051 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 64 &&
4057 case AMDGPU::G_FADD:
4058 case AMDGPU::G_FSUB:
4059 case AMDGPU::G_FMUL:
4061 case AMDGPU::G_FFLOOR:
4062 case AMDGPU::G_FCEIL:
4063 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
4064 case AMDGPU::G_FMINNUM:
4065 case AMDGPU::G_FMAXNUM:
4066 case AMDGPU::G_FMINIMUM:
4067 case AMDGPU::G_FMAXIMUM:
4068 case AMDGPU::G_FMINIMUMNUM:
4069 case AMDGPU::G_FMAXIMUMNUM:
4070 case AMDGPU::G_INTRINSIC_TRUNC:
4071 case AMDGPU::G_STRICT_FADD:
4072 case AMDGPU::G_STRICT_FSUB:
4073 case AMDGPU::G_STRICT_FMUL:
4074 case AMDGPU::G_STRICT_FMA: {
4075 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
4076 unsigned Size = Ty.getSizeInBits();
4077 if (
Subtarget.hasSALUFloatInsts() && Ty.isScalar() &&
4082 case AMDGPU::G_FPTOSI:
4083 case AMDGPU::G_FPTOUI:
4084 case AMDGPU::G_SITOFP:
4085 case AMDGPU::G_UITOFP: {
4086 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4087 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4088 if (
Subtarget.hasSALUFloatInsts() && SizeDst == 32 && SizeSrc == 32 &&
4093 case AMDGPU::G_FPTRUNC:
4094 case AMDGPU::G_FPEXT: {
4095 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4096 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4097 if (
Subtarget.hasSALUFloatInsts() && SizeDst != 64 && SizeSrc != 64 &&
4102 case AMDGPU::G_FSQRT:
4103 case AMDGPU::G_FEXP2:
4104 case AMDGPU::G_FLOG2: {
4105 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4111 case AMDGPU::G_SADDSAT:
4112 case AMDGPU::G_SSUBSAT:
4113 case AMDGPU::G_UADDSAT:
4114 case AMDGPU::G_USUBSAT:
4115 case AMDGPU::G_FMAD:
4116 case AMDGPU::G_FLDEXP:
4117 case AMDGPU::G_FMINNUM_IEEE:
4118 case AMDGPU::G_FMAXNUM_IEEE:
4119 case AMDGPU::G_FCANONICALIZE:
4120 case AMDGPU::G_STRICT_FLDEXP:
4121 case AMDGPU::G_BSWAP:
4122 case AMDGPU::G_FSHR:
4123 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
4124 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
4125 case AMDGPU::G_AMDGPU_RCP_IFLAG:
4126 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4127 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4128 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4129 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4130 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4131 case AMDGPU::G_AMDGPU_SMED3:
4132 case AMDGPU::G_AMDGPU_FMED3:
4134 case AMDGPU::G_UMULH:
4135 case AMDGPU::G_SMULH: {
4140 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4141 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4150 bool AllSalu =
true;
4151 bool MulSalu =
true;
4152 for (
unsigned i = 0; i < 5; ++i) {
4155 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4157 if (i == 2 || i == 3) {
4171 if (!MulSalu ||
Subtarget.hasFullRate64Ops())
4175 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4176 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4177 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4178 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4179 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4182 case AMDGPU::G_IMPLICIT_DEF: {
4183 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4184 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4187 case AMDGPU::G_FCONSTANT:
4188 case AMDGPU::G_CONSTANT:
4189 case AMDGPU::G_GLOBAL_VALUE:
4190 case AMDGPU::G_FRAME_INDEX:
4191 case AMDGPU::G_BLOCK_ADDR:
4192 case AMDGPU::G_READSTEADYCOUNTER:
4193 case AMDGPU::G_READCYCLECOUNTER: {
4194 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4195 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4198 case AMDGPU::G_DYN_STACKALLOC: {
4200 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4202 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4205 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4210 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4211 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4214 case AMDGPU::G_INSERT: {
4219 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4220 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4221 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4222 OpdsMapping[3] =
nullptr;
4225 case AMDGPU::G_EXTRACT: {
4229 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4230 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4231 OpdsMapping[2] =
nullptr;
4234 case AMDGPU::G_BUILD_VECTOR:
4235 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4236 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4239 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4242 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4244 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4245 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4246 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4252 case AMDGPU::G_MERGE_VALUES:
4253 case AMDGPU::G_CONCAT_VECTORS: {
4255 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4256 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4258 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4260 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4261 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4264 case AMDGPU::G_BITREVERSE:
4265 case AMDGPU::G_BITCAST:
4266 case AMDGPU::G_INTTOPTR:
4267 case AMDGPU::G_PTRTOINT:
4268 case AMDGPU::G_FABS:
4269 case AMDGPU::G_FNEG: {
4270 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4272 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4275 case AMDGPU::G_AMDGPU_FFBH_U32:
4276 case AMDGPU::G_AMDGPU_FFBL_B32:
4277 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4278 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4279 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4281 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4282 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4285 case AMDGPU::G_CTPOP: {
4286 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4288 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4293 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4296 case AMDGPU::G_TRUNC: {
4302 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4303 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4306 case AMDGPU::G_ZEXT:
4307 case AMDGPU::G_SEXT:
4308 case AMDGPU::G_ANYEXT:
4309 case AMDGPU::G_SEXT_INREG: {
4318 switch (SrcBank->
getID()) {
4319 case AMDGPU::SGPRRegBankID:
4320 DstBank = AMDGPU::SGPRRegBankID;
4323 DstBank = AMDGPU::VGPRRegBankID;
4329 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4330 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4334 case AMDGPU::G_IS_FPCLASS: {
4336 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4337 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4338 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4339 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4342 case AMDGPU::G_STORE: {
4344 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4349 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4350 OpdsMapping[0] = ValMapping;
4354 case AMDGPU::G_ICMP:
4355 case AMDGPU::G_FCMP: {
4356 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4361 AMDGPU::SGPRRegBankID);
4365 auto canUseSCCICMP = [&]() {
4368 return Size == 32 ||
4373 auto canUseSCCFCMP = [&]() {
4377 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4378 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4379 Op2Bank == AMDGPU::SGPRRegBankID &&
4380 Op3Bank == AMDGPU::SGPRRegBankID &&
4381 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4383 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4384 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4388 const unsigned ResultSize = 1;
4390 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4391 OpdsMapping[1] =
nullptr;
4392 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4393 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4396 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4399 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4400 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4401 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4403 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4405 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4406 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4409 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4412 case AMDGPU::G_INSERT_VECTOR_ELT: {
4414 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4416 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4417 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4418 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4422 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4423 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4427 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4428 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4431 assert(InsertSize == 32 || InsertSize == 64);
4432 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4436 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4439 case AMDGPU::G_UNMERGE_VALUES: {
4444 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4446 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4450 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4451 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4452 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4453 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4454 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4455 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4456 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4457 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4458 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4459 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4460 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4461 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4462 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4463 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4464 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4465 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4466 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4467 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4468 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4469 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4470 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4471 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4490 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4491 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4492 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4493 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4494 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4495 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4496 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4497 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4498 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4499 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4500 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4501 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4502 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4503 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4504 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4527 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4553 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4554 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4555 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4556 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4557 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4565 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4566 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4567 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4569 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4570 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4573 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
4577 case AMDGPU::G_INTRINSIC:
4578 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4582 case Intrinsic::amdgcn_div_fmas:
4583 case Intrinsic::amdgcn_div_fixup:
4584 case Intrinsic::amdgcn_trig_preop:
4585 case Intrinsic::amdgcn_sin:
4586 case Intrinsic::amdgcn_cos:
4587 case Intrinsic::amdgcn_log_clamp:
4588 case Intrinsic::amdgcn_rcp_legacy:
4589 case Intrinsic::amdgcn_rsq_legacy:
4590 case Intrinsic::amdgcn_rsq_clamp:
4591 case Intrinsic::amdgcn_tanh:
4592 case Intrinsic::amdgcn_fmul_legacy:
4593 case Intrinsic::amdgcn_fma_legacy:
4594 case Intrinsic::amdgcn_frexp_mant:
4595 case Intrinsic::amdgcn_frexp_exp:
4596 case Intrinsic::amdgcn_fract:
4597 case Intrinsic::amdgcn_cvt_pknorm_i16:
4598 case Intrinsic::amdgcn_cvt_pknorm_u16:
4599 case Intrinsic::amdgcn_cvt_pk_i16:
4600 case Intrinsic::amdgcn_cvt_pk_u16:
4601 case Intrinsic::amdgcn_cvt_sr_pk_f16_f32:
4602 case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
4603 case Intrinsic::amdgcn_cvt_pk_f16_fp8:
4604 case Intrinsic::amdgcn_cvt_pk_f16_bf8:
4605 case Intrinsic::amdgcn_cvt_pk_fp8_f16:
4606 case Intrinsic::amdgcn_cvt_pk_bf8_f16:
4607 case Intrinsic::amdgcn_cvt_sr_fp8_f16:
4608 case Intrinsic::amdgcn_cvt_sr_bf8_f16:
4609 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp8:
4610 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp8:
4611 case Intrinsic::amdgcn_cvt_scale_pk8_f16_bf8:
4612 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_bf8:
4613 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp4:
4614 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp4:
4615 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:
4616 case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:
4617 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:
4618 case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:
4619 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:
4620 case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:
4621 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:
4622 case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:
4623 case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:
4624 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:
4625 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:
4626 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:
4627 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16:
4628 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32:
4629 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32:
4630 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32:
4631 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16:
4632 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16:
4633 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f32:
4634 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f32:
4635 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f16:
4636 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f16:
4637 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_bf16:
4638 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_bf16:
4639 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_bf16:
4640 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_bf16:
4641 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f16:
4642 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f16:
4643 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f32:
4644 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f32:
4645 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f32:
4646 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f16:
4647 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_bf16:
4648 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f32:
4649 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f32:
4650 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f16:
4651 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f16:
4652 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_bf16:
4653 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:
4654 case Intrinsic::amdgcn_sat_pk4_i4_i8:
4655 case Intrinsic::amdgcn_sat_pk4_u4_u8:
4656 case Intrinsic::amdgcn_fmed3:
4657 case Intrinsic::amdgcn_cubeid:
4658 case Intrinsic::amdgcn_cubema:
4659 case Intrinsic::amdgcn_cubesc:
4660 case Intrinsic::amdgcn_cubetc:
4661 case Intrinsic::amdgcn_sffbh:
4662 case Intrinsic::amdgcn_fmad_ftz:
4663 case Intrinsic::amdgcn_mbcnt_lo:
4664 case Intrinsic::amdgcn_mbcnt_hi:
4665 case Intrinsic::amdgcn_mul_u24:
4666 case Intrinsic::amdgcn_mul_i24:
4667 case Intrinsic::amdgcn_mulhi_u24:
4668 case Intrinsic::amdgcn_mulhi_i24:
4669 case Intrinsic::amdgcn_lerp:
4670 case Intrinsic::amdgcn_sad_u8:
4671 case Intrinsic::amdgcn_msad_u8:
4672 case Intrinsic::amdgcn_sad_hi_u8:
4673 case Intrinsic::amdgcn_sad_u16:
4674 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4675 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4676 case Intrinsic::amdgcn_mqsad_u32_u8:
4677 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4678 case Intrinsic::amdgcn_alignbyte:
4679 case Intrinsic::amdgcn_perm:
4680 case Intrinsic::amdgcn_prng_b32:
4681 case Intrinsic::amdgcn_fdot2:
4682 case Intrinsic::amdgcn_sdot2:
4683 case Intrinsic::amdgcn_udot2:
4684 case Intrinsic::amdgcn_sdot4:
4685 case Intrinsic::amdgcn_udot4:
4686 case Intrinsic::amdgcn_sdot8:
4687 case Intrinsic::amdgcn_udot8:
4688 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4689 case Intrinsic::amdgcn_fdot2_f16_f16:
4690 case Intrinsic::amdgcn_fdot2_f32_bf16:
4691 case Intrinsic::amdgcn_fdot2c_f32_bf16:
4692 case Intrinsic::amdgcn_sudot4:
4693 case Intrinsic::amdgcn_sudot8:
4694 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4695 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4696 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4697 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4698 case Intrinsic::amdgcn_cvt_f32_fp8:
4699 case Intrinsic::amdgcn_cvt_f32_fp8_e5m3:
4700 case Intrinsic::amdgcn_cvt_f32_bf8:
4701 case Intrinsic::amdgcn_cvt_off_f32_i4:
4702 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4703 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4704 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4705 case Intrinsic::amdgcn_cvt_pk_fp8_f32_e5m3:
4706 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4707 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4708 case Intrinsic::amdgcn_cvt_sr_fp8_f32_e5m3:
4709 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4710 case Intrinsic::amdgcn_cvt_sr_bf16_f32:
4711 case Intrinsic::amdgcn_cvt_sr_f16_f32:
4712 case Intrinsic::amdgcn_cvt_f16_fp8:
4713 case Intrinsic::amdgcn_cvt_f16_bf8:
4714 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
4715 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
4716 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:
4717 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16:
4718 case Intrinsic::amdgcn_cvt_scalef32_f16_fp8:
4719 case Intrinsic::amdgcn_cvt_scalef32_f16_bf8:
4720 case Intrinsic::amdgcn_cvt_scalef32_f32_fp8:
4721 case Intrinsic::amdgcn_cvt_scalef32_f32_bf8:
4722 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f32:
4723 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f32:
4724 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp8:
4725 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_bf8:
4726 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f16:
4727 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_bf16:
4728 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f16:
4729 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_bf16:
4730 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp4:
4731 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f32:
4732 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp4:
4733 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp4:
4734 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_fp6:
4735 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_bf6:
4736 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_bf6:
4737 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_bf6:
4738 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_fp6:
4739 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_fp6:
4740 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_bf8:
4741 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_bf8:
4742 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp8:
4743 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp8:
4744 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f16:
4745 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_bf16:
4746 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
4747 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
4748 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
4749 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
4750 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
4751 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
4752 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
4753 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
4754 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
4755 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_bf16:
4756 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f16:
4757 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f32:
4758 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_bf16:
4759 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f16:
4760 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f32:
4761 case Intrinsic::amdgcn_ashr_pk_i8_i32:
4762 case Intrinsic::amdgcn_ashr_pk_u8_i32:
4763 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:
4764 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32:
4765 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4766 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4767 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4768 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4769 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4770 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4771 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4772 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4773 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4774 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4775 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4776 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4777 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4778 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4779 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4780 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4781 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4782 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4783 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4784 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4785 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4786 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4787 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4788 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4789 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4790 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4791 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4792 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4793 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4794 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
4795 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8:
4796 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8:
4797 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8:
4798 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8:
4799 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8:
4800 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8:
4801 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8:
4802 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8:
4803 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8:
4804 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8:
4805 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8:
4806 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8:
4807 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8:
4808 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8:
4809 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8:
4810 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8:
4811 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
4812 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
4813 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
4814 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
4815 case Intrinsic::amdgcn_wmma_f32_32x16x128_f4:
4816 case Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4:
4817 case Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4:
4818 case Intrinsic::amdgcn_swmmac_f16_16x16x64_f16:
4819 case Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16:
4820 case Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16:
4821 case Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16:
4822 case Intrinsic::amdgcn_swmmac_f32_16x16x64_f16:
4823 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8:
4824 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8:
4825 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8:
4826 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8:
4827 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8:
4828 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8:
4829 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8:
4830 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8:
4831 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
4832 case Intrinsic::amdgcn_perm_pk16_b4_u4:
4833 case Intrinsic::amdgcn_perm_pk16_b6_u4:
4834 case Intrinsic::amdgcn_perm_pk16_b8_u4:
4835 case Intrinsic::amdgcn_add_max_i32:
4836 case Intrinsic::amdgcn_add_max_u32:
4837 case Intrinsic::amdgcn_add_min_i32:
4838 case Intrinsic::amdgcn_add_min_u32:
4839 case Intrinsic::amdgcn_pk_add_max_i16:
4840 case Intrinsic::amdgcn_pk_add_max_u16:
4841 case Intrinsic::amdgcn_pk_add_min_i16:
4842 case Intrinsic::amdgcn_pk_add_min_u16:
4844 case Intrinsic::amdgcn_log:
4845 case Intrinsic::amdgcn_exp2:
4846 case Intrinsic::amdgcn_rcp:
4847 case Intrinsic::amdgcn_rsq:
4848 case Intrinsic::amdgcn_sqrt: {
4849 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4855 case Intrinsic::amdgcn_sbfe:
4856 case Intrinsic::amdgcn_ubfe:
4860 case Intrinsic::amdgcn_ds_swizzle:
4861 case Intrinsic::amdgcn_ds_permute:
4862 case Intrinsic::amdgcn_ds_bpermute:
4863 case Intrinsic::amdgcn_update_dpp:
4864 case Intrinsic::amdgcn_mov_dpp8:
4865 case Intrinsic::amdgcn_mov_dpp:
4866 case Intrinsic::amdgcn_strict_wwm:
4867 case Intrinsic::amdgcn_wwm:
4868 case Intrinsic::amdgcn_strict_wqm:
4869 case Intrinsic::amdgcn_wqm:
4870 case Intrinsic::amdgcn_softwqm:
4871 case Intrinsic::amdgcn_set_inactive:
4872 case Intrinsic::amdgcn_set_inactive_chain_arg:
4873 case Intrinsic::amdgcn_permlane64:
4874 case Intrinsic::amdgcn_ds_bpermute_fi_b32:
4876 case Intrinsic::amdgcn_cvt_pkrtz:
4880 case Intrinsic::amdgcn_kernarg_segment_ptr:
4881 case Intrinsic::amdgcn_s_getpc:
4882 case Intrinsic::amdgcn_groupstaticsize:
4883 case Intrinsic::amdgcn_reloc_constant:
4884 case Intrinsic::returnaddress: {
4885 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4886 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4889 case Intrinsic::amdgcn_wqm_vote: {
4890 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4891 OpdsMapping[0] = OpdsMapping[2]
4892 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4895 case Intrinsic::amdgcn_ps_live: {
4896 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4899 case Intrinsic::amdgcn_div_scale: {
4900 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4901 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4902 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4903 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4905 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4906 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4907 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4910 case Intrinsic::amdgcn_class: {
4911 Register Src0Reg =
MI.getOperand(2).getReg();
4912 Register Src1Reg =
MI.getOperand(3).getReg();
4913 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4914 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4915 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4916 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4917 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4918 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4921 case Intrinsic::amdgcn_icmp:
4922 case Intrinsic::amdgcn_fcmp: {
4923 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4925 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4926 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4927 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4928 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4931 case Intrinsic::amdgcn_readlane: {
4934 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4936 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4939 case Intrinsic::amdgcn_readfirstlane: {
4940 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4941 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4942 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4943 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4946 case Intrinsic::amdgcn_writelane: {
4947 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4949 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4952 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4954 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4958 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4959 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4960 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4963 case Intrinsic::amdgcn_if_break: {
4965 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4966 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4967 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4970 case Intrinsic::amdgcn_permlane16:
4971 case Intrinsic::amdgcn_permlanex16: {
4973 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4974 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4975 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4980 case Intrinsic::amdgcn_permlane_bcast:
4981 case Intrinsic::amdgcn_permlane_up:
4982 case Intrinsic::amdgcn_permlane_down:
4983 case Intrinsic::amdgcn_permlane_xor: {
4985 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4986 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4991 case Intrinsic::amdgcn_permlane_idx_gen: {
4993 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4994 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4998 case Intrinsic::amdgcn_permlane16_var:
4999 case Intrinsic::amdgcn_permlanex16_var: {
5001 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5002 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5003 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5004 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5007 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
5008 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
5009 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
5010 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
5011 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
5012 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
5013 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
5014 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
5015 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
5016 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
5017 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
5018 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
5019 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
5020 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
5021 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
5022 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
5023 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
5024 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
5025 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
5026 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
5027 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
5028 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
5029 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
5030 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
5031 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
5032 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
5033 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
5034 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
5035 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
5036 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
5037 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
5038 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
5039 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
5040 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
5041 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
5042 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
5043 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
5044 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
5045 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8:
5046 case Intrinsic::amdgcn_mfma_f32_16x16x32_f16:
5047 case Intrinsic::amdgcn_mfma_f32_32x32x16_f16:
5048 case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
5049 case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
5050 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
5051 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5052 unsigned MinNumRegsRequired = DstSize / 32;
5062 bool UseAGPRForm = !
Subtarget.hasGFX90AInsts() ||
5063 Info->selectAGPRFormMFMA(MinNumRegsRequired);
5075 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
5076 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
5077 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5078 unsigned MinNumRegsRequired = DstSize / 32;
5097 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
5098 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
5099 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
5100 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
5101 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
5102 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
5103 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
5104 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
5105 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
5106 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
5107 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
5108 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
5109 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
5110 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
5111 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
5112 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
5113 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
5114 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
5115 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
5116 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
5117 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
5118 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
5119 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
5120 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
5121 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
5122 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
5123 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
5124 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
5126 unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
5127 unsigned MinNumRegsRequired = DstSize / 32;
5143 case Intrinsic::amdgcn_interp_p1:
5144 case Intrinsic::amdgcn_interp_p2:
5145 case Intrinsic::amdgcn_interp_mov:
5146 case Intrinsic::amdgcn_interp_p1_f16:
5147 case Intrinsic::amdgcn_interp_p2_f16:
5148 case Intrinsic::amdgcn_lds_param_load: {
5149 const int M0Idx =
MI.getNumOperands() - 1;
5150 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5152 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5154 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5155 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5156 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5160 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5163 case Intrinsic::amdgcn_interp_inreg_p10:
5164 case Intrinsic::amdgcn_interp_inreg_p2:
5165 case Intrinsic::amdgcn_interp_inreg_p10_f16:
5166 case Intrinsic::amdgcn_interp_inreg_p2_f16:
5167 case Intrinsic::amdgcn_interp_p10_rtz_f16:
5168 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
5169 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5170 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5171 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5172 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5173 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5176 case Intrinsic::amdgcn_permlane16_swap:
5177 case Intrinsic::amdgcn_permlane32_swap: {
5178 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5179 OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] =
5180 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5183 case Intrinsic::amdgcn_ballot: {
5184 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5185 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5186 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5187 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
5190 case Intrinsic::amdgcn_inverse_ballot: {
5192 Register MaskReg =
MI.getOperand(2).getReg();
5193 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
5194 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
5195 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5196 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5199 case Intrinsic::amdgcn_bitop3: {
5201 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5202 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5203 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5204 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5207 case Intrinsic::amdgcn_s_quadmask:
5208 case Intrinsic::amdgcn_s_wqm: {
5209 Register MaskReg =
MI.getOperand(2).getReg();
5210 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
5211 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
5212 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
5213 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5216 case Intrinsic::amdgcn_wave_reduce_add:
5217 case Intrinsic::amdgcn_wave_reduce_fadd:
5218 case Intrinsic::amdgcn_wave_reduce_sub:
5219 case Intrinsic::amdgcn_wave_reduce_fsub:
5220 case Intrinsic::amdgcn_wave_reduce_min:
5221 case Intrinsic::amdgcn_wave_reduce_umin:
5222 case Intrinsic::amdgcn_wave_reduce_fmin:
5223 case Intrinsic::amdgcn_wave_reduce_max:
5224 case Intrinsic::amdgcn_wave_reduce_umax:
5225 case Intrinsic::amdgcn_wave_reduce_fmax:
5226 case Intrinsic::amdgcn_wave_reduce_and:
5227 case Intrinsic::amdgcn_wave_reduce_or:
5228 case Intrinsic::amdgcn_wave_reduce_xor: {
5229 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5230 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5231 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5234 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
5237 case Intrinsic::amdgcn_s_bitreplicate:
5238 Register MaskReg =
MI.getOperand(2).getReg();
5239 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
5240 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5241 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
5245 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
5246 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
5247 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
5248 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
5249 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
5252 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
5259 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
5260 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
5261 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5263 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
5264 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
5265 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
5266 unsigned LastRegOpIdx =
MI.getNumExplicitOperands() - 1 - NumMods;
5267 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5268 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5270 OpdsMapping[1] = AMDGPU::getValueMapping(
5271 AMDGPU::VGPRRegBankID,
5272 MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits());
5273 OpdsMapping[2] = AMDGPU::getValueMapping(
5274 AMDGPU::VGPRRegBankID,
5275 MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
5277 OpdsMapping[LastRegOpIdx] =
5279 if (LastRegOpIdx == 3) {
5281 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5284 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5287 unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
5288 for (
unsigned I = FirstSrcOpIdx;
I < LastRegOpIdx; ++
I) {
5289 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
5290 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5295 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
5296 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
5299 case Intrinsic::amdgcn_s_getreg:
5300 case Intrinsic::amdgcn_s_memtime:
5301 case Intrinsic::amdgcn_s_memrealtime:
5302 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
5303 case Intrinsic::amdgcn_s_sendmsg_rtn: {
5304 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5305 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5308 case Intrinsic::amdgcn_global_atomic_csub:
5309 case Intrinsic::amdgcn_global_atomic_fmin_num:
5310 case Intrinsic::amdgcn_global_atomic_fmax_num:
5311 case Intrinsic::amdgcn_flat_atomic_fmin_num:
5312 case Intrinsic::amdgcn_flat_atomic_fmax_num:
5313 case Intrinsic::amdgcn_atomic_cond_sub_u32:
5314 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
5315 case Intrinsic::amdgcn_global_load_tr_b64:
5316 case Intrinsic::amdgcn_global_load_tr_b128:
5317 case Intrinsic::amdgcn_global_load_tr4_b64:
5318 case Intrinsic::amdgcn_global_load_tr6_b96:
5319 case Intrinsic::amdgcn_ds_load_tr8_b64:
5320 case Intrinsic::amdgcn_ds_load_tr16_b128:
5321 case Intrinsic::amdgcn_ds_load_tr4_b64:
5322 case Intrinsic::amdgcn_ds_load_tr6_b96:
5323 case Intrinsic::amdgcn_flat_load_monitor_b32:
5324 case Intrinsic::amdgcn_flat_load_monitor_b64:
5325 case Intrinsic::amdgcn_flat_load_monitor_b128:
5326 case Intrinsic::amdgcn_global_load_monitor_b32:
5327 case Intrinsic::amdgcn_global_load_monitor_b64:
5328 case Intrinsic::amdgcn_global_load_monitor_b128:
5329 case Intrinsic::amdgcn_ds_read_tr4_b64:
5330 case Intrinsic::amdgcn_ds_read_tr6_b96:
5331 case Intrinsic::amdgcn_ds_read_tr8_b64:
5332 case Intrinsic::amdgcn_ds_read_tr16_b64:
5333 case Intrinsic::amdgcn_ds_atomic_async_barrier_arrive_b64:
5334 case Intrinsic::amdgcn_ds_atomic_barrier_arrive_rtn_b64:
5336 case Intrinsic::amdgcn_ds_ordered_add:
5337 case Intrinsic::amdgcn_ds_ordered_swap: {
5338 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5339 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5341 AMDGPU::SGPRRegBankID);
5342 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
5343 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5346 case Intrinsic::amdgcn_ds_append:
5347 case Intrinsic::amdgcn_ds_consume: {
5348 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5349 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5353 case Intrinsic::amdgcn_exp_compr:
5354 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5355 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5357 case Intrinsic::amdgcn_exp:
5359 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5360 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5361 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5362 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5364 case Intrinsic::amdgcn_exp_row:
5365 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5366 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5367 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5368 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5371 case Intrinsic::amdgcn_s_sendmsg:
5372 case Intrinsic::amdgcn_s_sendmsghalt: {
5375 AMDGPU::SGPRRegBankID);
5376 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5379 case Intrinsic::amdgcn_s_setreg: {
5382 AMDGPU::SGPRRegBankID);
5383 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5386 case Intrinsic::amdgcn_s_ttracedata: {
5390 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5393 case Intrinsic::amdgcn_end_cf: {
5395 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5398 case Intrinsic::amdgcn_else: {
5400 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5401 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5402 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5405 case Intrinsic::amdgcn_init_whole_wave:
5406 case Intrinsic::amdgcn_live_mask: {
5407 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5410 case Intrinsic::amdgcn_wqm_demote:
5411 case Intrinsic::amdgcn_kill: {
5412 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5415 case Intrinsic::amdgcn_raw_buffer_load:
5416 case Intrinsic::amdgcn_raw_ptr_buffer_load:
5417 case Intrinsic::amdgcn_raw_atomic_buffer_load:
5418 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
5419 case Intrinsic::amdgcn_raw_tbuffer_load:
5420 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
5429 case Intrinsic::amdgcn_raw_buffer_load_lds:
5430 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
5437 case Intrinsic::amdgcn_raw_buffer_store:
5438 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5439 case Intrinsic::amdgcn_raw_buffer_store_format:
5440 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5441 case Intrinsic::amdgcn_raw_tbuffer_store:
5442 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5449 case Intrinsic::amdgcn_struct_buffer_load:
5450 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5451 case Intrinsic::amdgcn_struct_tbuffer_load:
5452 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5453 case Intrinsic::amdgcn_struct_atomic_buffer_load:
5454 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
5462 case Intrinsic::amdgcn_struct_buffer_load_lds:
5463 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5471 case Intrinsic::amdgcn_struct_buffer_store:
5472 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5473 case Intrinsic::amdgcn_struct_tbuffer_store:
5474 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5482 case Intrinsic::amdgcn_init_exec_from_input: {
5484 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5487 case Intrinsic::amdgcn_ds_gws_init:
5488 case Intrinsic::amdgcn_ds_gws_barrier:
5489 case Intrinsic::amdgcn_ds_gws_sema_br: {
5490 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5494 AMDGPU::SGPRRegBankID);
5495 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5498 case Intrinsic::amdgcn_ds_gws_sema_v:
5499 case Intrinsic::amdgcn_ds_gws_sema_p:
5500 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5503 AMDGPU::SGPRRegBankID);
5504 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5507 case Intrinsic::amdgcn_cluster_load_b32:
5508 case Intrinsic::amdgcn_cluster_load_b64:
5509 case Intrinsic::amdgcn_cluster_load_b128: {
5514 OpdsMapping[4] = AMDGPU::getValueMapping(M0Bank, 32);
5517 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:
5518 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
5519 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
5520 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
5525 OpdsMapping[5] = AMDGPU::getValueMapping(M0Bank, 32);
5528 case Intrinsic::amdgcn_global_store_async_from_lds_b8:
5529 case Intrinsic::amdgcn_global_store_async_from_lds_b32:
5530 case Intrinsic::amdgcn_global_store_async_from_lds_b64:
5531 case Intrinsic::amdgcn_global_store_async_from_lds_b128:
5532 case Intrinsic::amdgcn_global_load_async_to_lds_b8:
5533 case Intrinsic::amdgcn_global_load_async_to_lds_b32:
5534 case Intrinsic::amdgcn_global_load_async_to_lds_b64:
5535 case Intrinsic::amdgcn_global_load_async_to_lds_b128:
5536 case Intrinsic::amdgcn_load_to_lds:
5537 case Intrinsic::amdgcn_global_load_lds: {
5542 case Intrinsic::amdgcn_lds_direct_load: {
5543 const int M0Idx =
MI.getNumOperands() - 1;
5544 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5546 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5548 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5549 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5550 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5554 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5557 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5558 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5562 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
5563 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
5564 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
5565 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn: {
5578 case Intrinsic::amdgcn_s_sleep_var:
5581 case Intrinsic::amdgcn_s_barrier_join:
5584 case Intrinsic::amdgcn_s_barrier_init:
5585 case Intrinsic::amdgcn_s_barrier_signal_var:
5589 case Intrinsic::amdgcn_s_barrier_signal_isfirst: {
5590 const unsigned ResultSize = 1;
5592 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5595 case Intrinsic::amdgcn_s_get_barrier_state:
5596 case Intrinsic::amdgcn_s_get_named_barrier_state: {
5601 case Intrinsic::amdgcn_pops_exiting_wave_id:
5603 case Intrinsic::amdgcn_tensor_load_to_lds_d2:
5604 case Intrinsic::amdgcn_tensor_store_from_lds_d2:
5605 case Intrinsic::amdgcn_tensor_load_to_lds:
5606 case Intrinsic::amdgcn_tensor_store_from_lds: {
5609 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I) {
5610 if (
MI.getOperand(
I).isReg()) {
5614 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5619 case Intrinsic::amdgcn_s_prefetch_data: {
5624 case Intrinsic::amdgcn_flat_prefetch:
5625 case Intrinsic::amdgcn_global_prefetch:
5632 case AMDGPU::G_SELECT: {
5633 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5635 AMDGPU::SGPRRegBankID);
5637 AMDGPU::SGPRRegBankID);
5638 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5639 Op3Bank == AMDGPU::SGPRRegBankID;
5641 unsigned CondBankDefault = SGPRSrcs ?
5642 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5645 if (CondBank == AMDGPU::SGPRRegBankID)
5646 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5647 else if (CondBank == AMDGPU::VGPRRegBankID)
5648 CondBank = AMDGPU::VCCRegBankID;
5650 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5651 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5653 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5657 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5658 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5659 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5660 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5662 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5663 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5664 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5665 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5671 case AMDGPU::G_SI_CALL: {
5672 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5678 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5679 if (
MI.getOperand(
I).isReg()) {
5683 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5688 case AMDGPU::G_LOAD:
5689 case AMDGPU::G_ZEXTLOAD:
5690 case AMDGPU::G_SEXTLOAD:
5693 case AMDGPU::G_ATOMICRMW_XCHG:
5694 case AMDGPU::G_ATOMICRMW_ADD:
5695 case AMDGPU::G_ATOMICRMW_SUB:
5696 case AMDGPU::G_ATOMICRMW_AND:
5697 case AMDGPU::G_ATOMICRMW_OR:
5698 case AMDGPU::G_ATOMICRMW_XOR:
5699 case AMDGPU::G_ATOMICRMW_MAX:
5700 case AMDGPU::G_ATOMICRMW_MIN:
5701 case AMDGPU::G_ATOMICRMW_UMAX:
5702 case AMDGPU::G_ATOMICRMW_UMIN:
5703 case AMDGPU::G_ATOMICRMW_FADD:
5704 case AMDGPU::G_ATOMICRMW_FMIN:
5705 case AMDGPU::G_ATOMICRMW_FMAX:
5706 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5707 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5708 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5714 case AMDGPU::G_ATOMIC_CMPXCHG: {
5721 case AMDGPU::G_BRCOND: {
5723 AMDGPU::SGPRRegBankID);
5724 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
5725 if (Bank != AMDGPU::SGPRRegBankID)
5726 Bank = AMDGPU::VCCRegBankID;
5728 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5731 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
5733 case AMDGPU::G_PREFETCH:
5736 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP:
5737 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN:
5738 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5744 MI.getNumOperands());