2208 B.setInstrAndDebugLoc(
MI);
2209 unsigned Opc =
MI.getOpcode();
2212 case AMDGPU::G_CONSTANT:
2213 case AMDGPU::G_IMPLICIT_DEF: {
2221 if (DstBank == &AMDGPU::VCCRegBank)
2224 if (DefRegs.
empty())
2227 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2230 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2232 MI.getOperand(0).setReg(NewDstReg);
2233 if (
Opc != AMDGPU::G_IMPLICIT_DEF) {
2234 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2235 MI.getOperand(1).setCImm(
2240 B.buildTrunc(DefRegs[0], NewDstReg);
2243 case AMDGPU::G_PHI: {
2252 if (DstBank == &AMDGPU::VCCRegBank) {
2259 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2263 if (SrcBank != &AMDGPU::VCCRegBank) {
2268 MRI.
setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2269 MI.getOperand(
I).setReg(Copy.getReg(0));
2280 ApplyRegBankMapping ApplyBank(
B, *
this, MRI, DstBank);
2281 B.setInsertPt(
B.getMBB(),
MI);
2289 case AMDGPU::G_FCMP:
2293 case AMDGPU::G_ICMP:
2294 case AMDGPU::G_UADDO:
2295 case AMDGPU::G_USUBO:
2296 case AMDGPU::G_UADDE:
2297 case AMDGPU::G_SADDE:
2298 case AMDGPU::G_USUBE:
2299 case AMDGPU::G_SSUBE: {
2300 unsigned BoolDstOp =
2301 (
Opc == AMDGPU::G_ICMP ||
Opc == AMDGPU::G_FCMP) ? 0 : 1;
2302 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2306 if (DstBank != &AMDGPU::SGPRRegBank)
2309 const bool HasCarryIn =
MI.getNumOperands() == 5;
2315 MRI.
setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2316 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2320 MRI.
setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2321 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2322 MI.getOperand(4).setReg(NewSrcReg);
2326 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2331 if (DefRegs.
empty())
2333 B.buildTrunc(DefRegs[0], NewDstReg);
2336 case AMDGPU::G_SELECT: {
2341 if (CondRegs.
empty())
2348 if (CondBank == &AMDGPU::SGPRRegBank) {
2351 MRI.
setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2353 MI.getOperand(1).setReg(NewCondReg);
2354 B.buildZExt(NewCondReg, CondRegs[0]);
2367 if (DefRegs.
empty()) {
2372 if (Src1Regs.
empty())
2378 if (Src2Regs.
empty())
2385 auto Flags =
MI.getFlags();
2386 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0], Flags);
2387 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1], Flags);
2390 MI.eraseFromParent();
2393 case AMDGPU::G_BRCOND: {
2394 Register CondReg =
MI.getOperand(0).getReg();
2399 if (CondBank == &AMDGPU::SGPRRegBank) {
2402 MRI.
setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2404 MI.getOperand(0).setReg(NewCondReg);
2405 B.buildZExt(NewCondReg, CondReg);
2413 case AMDGPU::G_XOR: {
2423 if (DstBank == &AMDGPU::VCCRegBank)
2427 ApplyRegBankMapping ApplyBank(
B, *
this, MRI, DstBank);
2436 if (DstTy.
getSizeInBits() == 16 && DstBank == &AMDGPU::SGPRRegBank) {
2440 ApplyRegBankMapping ApplySALU(
B, *
this, MRI, &AMDGPU::SGPRRegBank);
2445 if (
MI.getOpcode() == AMDGPU::G_XOR &&
2466 if (DefRegs.
empty()) {
2473 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2479 if (Src0Regs.
empty())
2484 if (Src1Regs.
empty())
2491 auto Flags =
MI.getFlags();
2492 B.buildInstr(
Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]}, Flags);
2493 B.buildInstr(
Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]}, Flags);
2496 MI.eraseFromParent();
2499 case AMDGPU::G_ABS: {
2505 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2507 ApplyRegBankMapping Apply(
B, *
this, MRI, &AMDGPU::VGPRRegBank);
2520 case AMDGPU::G_LSHR:
2521 case AMDGPU::G_ASHR:
2522 case AMDGPU::G_SMIN:
2523 case AMDGPU::G_SMAX:
2524 case AMDGPU::G_UMIN:
2525 case AMDGPU::G_UMAX: {
2532 if (!
Subtarget.hasVMulU64Inst() &&
Opc == AMDGPU::G_MUL &&
2545 if (DstBank == &AMDGPU::VGPRRegBank)
2551 ApplyRegBankMapping ApplySALU(
B, *
this, MRI, &AMDGPU::SGPRRegBank);
2556 std::tie(WideSrcLo, WideSrcHi) =
2558 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2559 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2560 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2561 MI.eraseFromParent();
2570 std::tie(WideSrc0Lo, WideSrc0Hi)
2572 std::tie(WideSrc1Lo, WideSrc1Hi)
2574 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2575 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2576 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2577 MI.eraseFromParent();
2585 if (
Opc == AMDGPU::G_SHL ||
Opc == AMDGPU::G_LSHR ||
2586 Opc == AMDGPU::G_ASHR) {
2587 B.setInsertPt(*
MBB,
MI.getIterator());
2595 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2596 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2610 Register SrcReg0 =
MI.getOperand(1).getReg();
2611 Register SrcReg1 =
MI.getOperand(2).getReg();
2614 assert(MRI.
getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2615 "that handles only 64-bit operands.");
2621 if (DstBank == &AMDGPU::SGPRRegBank) {
2622 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2623 MRI.
setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2624 MRI.
setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2625 MRI.
setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2632 "The destination operand should be in vector registers.");
2638 B.buildTrunc(Op0L, SrcReg0);
2644 B.buildTrunc(Op1L, SrcReg1);
2646 unsigned NewOpc =
Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2647 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2648 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2652 MRI.
setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2654 MRI.
setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2655 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2656 MI.eraseFromParent();
2659 case AMDGPU::G_SEXT_INREG: {
2661 if (SrcRegs.
empty())
2665 ApplyRegBankMapping O(
B, *
this, MRI, &AMDGPU::VGPRRegBank);
2672 int Amt =
MI.getOperand(2).getImm();
2678 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2680 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2682 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2685 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2689 B.buildCopy(DstRegs[0], SrcRegs[0]);
2690 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2695 MI.eraseFromParent();
2698 case AMDGPU::G_CTPOP:
2699 case AMDGPU::G_BITREVERSE: {
2702 if (DstBank == &AMDGPU::SGPRRegBank)
2711 ApplyRegBankMapping ApplyVALU(
B, *
this, MRI, &AMDGPU::VGPRRegBank);
2720 case AMDGPU::G_AMDGPU_FFBH_U32:
2721 case AMDGPU::G_AMDGPU_FFBL_B32:
2722 case AMDGPU::G_CTLZ_ZERO_POISON:
2723 case AMDGPU::G_CTTZ_ZERO_POISON: {
2726 if (DstBank == &AMDGPU::SGPRRegBank)
2741 ApplyRegBankMapping ApplyVALU(
B, *
this, MRI, &AMDGPU::VGPRRegBank);
2743 unsigned NewOpc =
Opc == AMDGPU::G_CTLZ_ZERO_POISON
2744 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2745 :
Opc == AMDGPU::G_CTTZ_ZERO_POISON
2746 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2748 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2749 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[Idx]});
2750 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[Idx ^ 1]});
2752 Opc == AMDGPU::G_CTLZ_ZERO_POISON ||
Opc == AMDGPU::G_CTTZ_ZERO_POISON
2754 : AMDGPU::G_UADDSAT;
2755 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2757 B.buildUMin(DstReg,
X,
Y);
2758 MI.eraseFromParent();
2761 case AMDGPU::G_SEXT:
2762 case AMDGPU::G_ZEXT:
2763 case AMDGPU::G_ANYEXT: {
2766 const bool Signed =
Opc == AMDGPU::G_SEXT;
2776 SrcBank != &AMDGPU::SGPRRegBank &&
2777 SrcBank != &AMDGPU::VCCRegBank &&
2781 SrcTy.getSizeInBits() <= 32) {
2787 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2788 }
else if (
Opc == AMDGPU::G_ZEXT) {
2789 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2791 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2796 MI.eraseFromParent();
2806 if (SrcBank == &AMDGPU::VCCRegBank) {
2813 const bool UseSel64 = DstSize > 32 &&
2814 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2818 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2819 auto False =
B.buildConstant(SelType, 0);
2826 B.buildSelect(DefRegs[0], SrcReg, True, False);
2828 }
else if (DstSize < 32) {
2829 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2831 B.buildTrunc(DstReg, Sel);
2833 B.buildSelect(DstReg, SrcReg, True, False);
2836 MI.eraseFromParent();
2842 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2854 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2866 unsigned ConstOffset;
2867 std::tie(BaseIdxReg, ConstOffset) =
2874 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2876 ConstOffset < SrcTy.getNumElements();
2879 if (ShouldMoveIndexIntoLoop)
2880 MI.getOperand(2).setReg(BaseIdxReg);
2886 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2887 SrcBank == &AMDGPU::SGPRRegBank;
2888 if (DstRegs.
empty()) {
2893 if (NeedCopyToVGPR) {
2897 MI.getOperand(0).setReg(TmpReg);
2898 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2905 if (ShouldMoveIndexIntoLoop)
2915 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2916 auto One =
B.buildConstant(
S32, 1);
2927 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2928 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2930 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2931 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2935 MRI.
setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2936 MRI.
setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2937 MRI.
setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2941 MI.eraseFromParent();
2947 B.setInstr(*Span.
begin());
2948 MI.eraseFromParent();
2952 if (NeedCopyToVGPR) {
2956 MRI.
setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2957 MRI.
setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2959 Extract0->getOperand(0).setReg(TmpReg0);
2960 Extract1->getOperand(0).setReg(TmpReg1);
2968 if (ShouldMoveIndexIntoLoop)
2973 case AMDGPU::G_INSERT_VECTOR_ELT: {
2983 MRI.
setType(
MI.getOperand(1).getReg(), VecTy);
2985 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2997 unsigned ConstOffset;
2998 std::tie(BaseIdxReg, ConstOffset) =
3005 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
3010 if (ShouldMoveIndexIntoLoop)
3011 MI.getOperand(3).setReg(BaseIdxReg);
3014 if (InsRegs.
empty()) {
3018 if (ShouldMoveIndexIntoLoop) {
3030 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
3031 auto One =
B.buildConstant(
S32, 1);
3040 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
3041 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
3043 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
3044 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3057 MRI.
setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3058 MRI.
setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3059 MRI.
setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3064 B.setInsertPt(
B.getMBB(),
MI);
3065 B.buildBitcast(DstReg, InsHi);
3066 MI.eraseFromParent();
3070 B.setInstr(*Span.
begin());
3071 MI.eraseFromParent();
3082 B.buildBitcast(DstReg, InsHi);
3085 if (ShouldMoveIndexIntoLoop)
3090 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3091 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3092 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3093 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3094 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3095 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3096 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3097 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3098 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3099 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3100 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3101 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3102 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3103 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3104 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3105 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3106 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3107 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3108 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3109 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3110 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3111 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3116 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3117 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3118 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3119 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3120 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3121 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3122 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3123 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3124 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3125 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3126 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3127 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
3128 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32:
3129 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32:
3130 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3131 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3132 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3137 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3142 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3143 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3144 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3145 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3146 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3150 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
3154 case AMDGPU::G_INTRINSIC:
3155 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3157 case Intrinsic::amdgcn_readlane: {
3168 case Intrinsic::amdgcn_writelane: {
3178 case Intrinsic::amdgcn_interp_p1:
3179 case Intrinsic::amdgcn_interp_p2:
3180 case Intrinsic::amdgcn_interp_mov:
3181 case Intrinsic::amdgcn_interp_p1_f16:
3182 case Intrinsic::amdgcn_interp_p2_f16:
3183 case Intrinsic::amdgcn_lds_param_load: {
3191 case Intrinsic::amdgcn_interp_inreg_p10:
3192 case Intrinsic::amdgcn_interp_inreg_p2:
3193 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3194 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3195 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3196 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3197 case Intrinsic::amdgcn_permlane16_swap:
3198 case Intrinsic::amdgcn_permlane32_swap:
3201 case Intrinsic::amdgcn_permlane16:
3202 case Intrinsic::amdgcn_permlanex16: {
3210 case Intrinsic::amdgcn_permlane_bcast:
3211 case Intrinsic::amdgcn_permlane_up:
3212 case Intrinsic::amdgcn_permlane_down:
3213 case Intrinsic::amdgcn_permlane_xor:
3218 case Intrinsic::amdgcn_permlane_idx_gen: {
3222 case Intrinsic::amdgcn_sbfe:
3225 case Intrinsic::amdgcn_ubfe:
3228 case Intrinsic::amdgcn_inverse_ballot:
3229 case Intrinsic::amdgcn_s_bitreplicate:
3230 case Intrinsic::amdgcn_s_quadmask:
3231 case Intrinsic::amdgcn_s_wqm:
3235 case Intrinsic::amdgcn_ballot:
3241 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3242 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3243 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3244 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3245 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3255 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3256 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
3257 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3259 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
3260 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
3261 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
3262 unsigned LastRegOpIdx =
MI.getNumExplicitOperands() - 1 - NumMods;
3267 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3268 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3271 case Intrinsic::amdgcn_ds_ordered_add:
3272 case Intrinsic::amdgcn_ds_ordered_swap: {
3279 case Intrinsic::amdgcn_ds_gws_init:
3280 case Intrinsic::amdgcn_ds_gws_barrier:
3281 case Intrinsic::amdgcn_ds_gws_sema_br: {
3287 case Intrinsic::amdgcn_ds_gws_sema_v:
3288 case Intrinsic::amdgcn_ds_gws_sema_p:
3289 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3294 case Intrinsic::amdgcn_ds_append:
3295 case Intrinsic::amdgcn_ds_consume: {
3299 case Intrinsic::amdgcn_s_alloc_vgpr:
3302 case Intrinsic::amdgcn_s_sendmsg:
3303 case Intrinsic::amdgcn_s_sendmsghalt: {
3308 case Intrinsic::amdgcn_s_setreg: {
3312 case Intrinsic::amdgcn_s_ttracedata:
3315 case Intrinsic::amdgcn_raw_buffer_load_lds:
3316 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3317 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
3318 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds: {
3325 case Intrinsic::amdgcn_struct_buffer_load_lds:
3326 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3327 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
3328 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: {
3335 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:
3336 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
3337 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
3338 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
3343 case Intrinsic::amdgcn_load_to_lds:
3344 case Intrinsic::amdgcn_load_async_to_lds:
3345 case Intrinsic::amdgcn_global_load_lds:
3346 case Intrinsic::amdgcn_global_load_async_lds: {
3351 case Intrinsic::amdgcn_lds_direct_load: {
3357 case Intrinsic::amdgcn_exp_row:
3361 case Intrinsic::amdgcn_cluster_load_b32:
3362 case Intrinsic::amdgcn_cluster_load_b64:
3363 case Intrinsic::amdgcn_cluster_load_b128: {
3368 case Intrinsic::amdgcn_s_sleep_var:
3372 case Intrinsic::amdgcn_s_barrier_join:
3373 case Intrinsic::amdgcn_s_wakeup_barrier:
3376 case Intrinsic::amdgcn_s_barrier_init:
3377 case Intrinsic::amdgcn_s_barrier_signal_var:
3381 case Intrinsic::amdgcn_s_get_barrier_state:
3382 case Intrinsic::amdgcn_s_get_named_barrier_state: {
3386 case Intrinsic::amdgcn_s_prefetch_data:
3387 case Intrinsic::amdgcn_s_prefetch_inst: {
3394 MI.eraseFromParent();
3397 case Intrinsic::amdgcn_tensor_load_to_lds:
3398 case Intrinsic::amdgcn_tensor_store_from_lds: {
3412 if (RSrcIntrin->IsImage) {
3423 case AMDGPU::G_SI_CALL: {
3434 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3435 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3441 unsigned NonCopyInstrsLen = 0;
3447 while (Start->getOpcode() != FrameSetupOpcode) {
3449 bool IsCopy =
false;
3450 if (Start->getOpcode() == AMDGPU::COPY) {
3451 auto &Dst = Start->getOperand(0);
3454 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3459 auto &Src = Start->getOperand(1);
3462 IsCopy = Info->getScratchRSrcReg() == Reg;
3470 NonCopyInstrsLen = NonCopyInstrs.
size();
3475 NonCopyInstrs.
resize(NonCopyInstrsLen);
3477 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3478 MBB->splice(LastCopy,
MBB, NonCopy->getIterator());
3483 NonCopyInstrs.
clear();
3484 NonCopyInstrsLen = 0;
3487 while (End->getOpcode() != FrameDestroyOpcode) {
3489 bool IsCopy =
false;
3490 if (End->getOpcode() == AMDGPU::COPY) {
3491 auto &Src = End->getOperand(1);
3494 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3500 NonCopyInstrsLen = NonCopyInstrs.
size();
3505 NonCopyInstrs.
resize(NonCopyInstrsLen);
3509 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3510 MBB->splice(LastCopy,
MBB, NonCopy->getIterator());
3514 B.setInsertPt(
B.getMBB(), Start);
3518 case AMDGPU::G_AMDGPU_FLAT_LOAD_MONITOR:
3519 case AMDGPU::G_AMDGPU_GLOBAL_LOAD_MONITOR:
3520 case AMDGPU::G_LOAD:
3521 case AMDGPU::G_ZEXTLOAD:
3522 case AMDGPU::G_SEXTLOAD: {
3527 case AMDGPU::G_DYN_STACKALLOC:
3530 case AMDGPU::G_STACKRESTORE: {
3535 case AMDGPU::G_SBFX:
3538 case AMDGPU::G_UBFX:
3541 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3542 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3545 case AMDGPU::G_PREFETCH: {
3547 MI.eraseFromParent();
3551 unsigned PtrBank =
getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID);
3552 if (PtrBank == AMDGPU::VGPRRegBankID &&
3553 (!
Subtarget.hasVmemPrefInsts() || !
MI.getOperand(3).getImm())) {
3555 MI.eraseFromParent();
3563 !
MI.getOperand(3).getImm() ))) {
3564 MI.eraseFromParent();
3861 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3876 DstBank = &AMDGPU::VCCRegBank;
3879 DstBank = &AMDGPU::VCCRegBank;
3890 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3895 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3897 OpdsMapping[0] = &ValMap;
3898 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3899 OpdsMapping[1] = &ValMap;
3906 if (
MI.isRegSequence()) {
3909 unsigned BankID = AMDGPU::SGPRRegBankID;
3911 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3915 if (OpBank != AMDGPU::SGPRRegBankID) {
3916 BankID = AMDGPU::VGPRRegBankID;
3933 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3938 ResultBank = DstBank->
getID();
3940 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3945 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3946 ResultBank = AMDGPU::VGPRRegBankID;
3951 unsigned OpBank = Bank->
getID();
3955 assert(ResultBank != AMDGPU::InvalidRegBankID);
3972 switch (
MI.getOpcode()) {
3979 case AMDGPU::G_MUL: {
3985 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3986 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3987 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3989 TargetBankID = DstBank->
getID();
3990 if (DstBank == &AMDGPU::VCCRegBank) {
3991 TargetBankID = AMDGPU::VCCRegBankID;
3992 BankLHS = AMDGPU::VCCRegBankID;
3993 BankRHS = AMDGPU::VCCRegBankID;
3996 AMDGPU::SGPRRegBankID);
3998 AMDGPU::SGPRRegBankID);
4002 AMDGPU::VCCRegBankID);
4004 AMDGPU::VCCRegBankID);
4007 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
4008 TargetBankID = AMDGPU::VGPRRegBankID;
4009 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
4010 TargetBankID = AMDGPU::VCCRegBankID;
4011 BankLHS = AMDGPU::VCCRegBankID;
4012 BankRHS = AMDGPU::VCCRegBankID;
4013 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
4014 TargetBankID = AMDGPU::SGPRRegBankID;
4018 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
4019 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
4020 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
4027 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
4028 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
4030 if (
MI.getOpcode() == AMDGPU::G_MUL &&
Subtarget.hasVMulU64Inst())
4031 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4034 getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
4036 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
4039 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
4047 case AMDGPU::G_PTR_ADD:
4048 case AMDGPU::G_PTRMASK:
4052 case AMDGPU::G_LSHR:
4053 case AMDGPU::G_ASHR:
4054 case AMDGPU::G_UADDO:
4055 case AMDGPU::G_USUBO:
4056 case AMDGPU::G_UADDE:
4057 case AMDGPU::G_SADDE:
4058 case AMDGPU::G_USUBE:
4059 case AMDGPU::G_SSUBE:
4061 case AMDGPU::G_SHUFFLE_VECTOR:
4062 case AMDGPU::G_SBFX:
4063 case AMDGPU::G_UBFX:
4064 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
4065 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
4068 unsigned Size = Ty.getSizeInBits();
4070 if (
Subtarget.hasPackedU64Ops() && Ty.isVector() &&
Size == 128)
4075 case AMDGPU::G_SMIN:
4076 case AMDGPU::G_SMAX:
4077 case AMDGPU::G_UMIN:
4078 case AMDGPU::G_UMAX:
4087 case AMDGPU::G_FADD:
4088 case AMDGPU::G_FSUB:
4089 case AMDGPU::G_FMUL:
4091 case AMDGPU::G_FFLOOR:
4092 case AMDGPU::G_FCEIL:
4093 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
4094 case AMDGPU::G_FMINNUM:
4095 case AMDGPU::G_FMAXNUM:
4096 case AMDGPU::G_FMINIMUMNUM:
4097 case AMDGPU::G_FMAXIMUMNUM:
4098 case AMDGPU::G_INTRINSIC_TRUNC:
4099 case AMDGPU::G_STRICT_FADD:
4100 case AMDGPU::G_STRICT_FSUB:
4101 case AMDGPU::G_STRICT_FMUL:
4102 case AMDGPU::G_STRICT_FMA: {
4104 unsigned Size = Ty.getSizeInBits();
4105 if (
Subtarget.hasSALUFloatInsts() && Ty.isScalar() &&
4110 case AMDGPU::G_FMINIMUM:
4111 case AMDGPU::G_FMAXIMUM: {
4113 unsigned Size = Ty.getSizeInBits();
4114 if (
Subtarget.hasSALUMinimumMaximumInsts() && Ty.isScalar() &&
4119 case AMDGPU::G_FPTOSI:
4120 case AMDGPU::G_FPTOUI:
4121 case AMDGPU::G_FPTOSI_SAT:
4122 case AMDGPU::G_FPTOUI_SAT:
4123 case AMDGPU::G_SITOFP:
4124 case AMDGPU::G_UITOFP: {
4127 if (
Subtarget.hasSALUFloatInsts() && SizeDst == 32 && SizeSrc == 32 &&
4132 case AMDGPU::G_FPTRUNC:
4133 case AMDGPU::G_FPEXT: {
4136 if (
Subtarget.hasSALUFloatInsts() && SizeDst != 64 && SizeSrc != 64 &&
4141 case AMDGPU::G_FSQRT:
4142 case AMDGPU::G_FEXP2:
4143 case AMDGPU::G_FLOG2: {
4150 case AMDGPU::G_SADDSAT:
4151 case AMDGPU::G_SSUBSAT:
4152 case AMDGPU::G_UADDSAT:
4153 case AMDGPU::G_USUBSAT:
4154 case AMDGPU::G_FMAD:
4155 case AMDGPU::G_FLDEXP:
4156 case AMDGPU::G_FMINNUM_IEEE:
4157 case AMDGPU::G_FMAXNUM_IEEE:
4158 case AMDGPU::G_FCANONICALIZE:
4159 case AMDGPU::G_STRICT_FLDEXP:
4160 case AMDGPU::G_BSWAP:
4161 case AMDGPU::G_FSHR:
4162 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
4163 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
4164 case AMDGPU::G_AMDGPU_RCP_IFLAG:
4165 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4166 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4167 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4168 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4169 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4170 case AMDGPU::G_AMDGPU_SMED3:
4171 case AMDGPU::G_AMDGPU_FMED3:
4173 case AMDGPU::G_UMULH:
4174 case AMDGPU::G_SMULH: {
4179 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4180 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4189 bool AllSalu =
true;
4190 bool MulSalu =
true;
4191 for (
unsigned i = 0; i < 5; ++i) {
4194 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4196 if (i == 2 || i == 3) {
4210 if (!MulSalu ||
Subtarget.hasFullRate64Ops())
4214 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4215 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4216 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4217 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4218 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4221 case AMDGPU::G_IMPLICIT_DEF: {
4223 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4226 case AMDGPU::G_FCONSTANT:
4227 case AMDGPU::G_CONSTANT:
4228 case AMDGPU::G_GLOBAL_VALUE:
4229 case AMDGPU::G_FRAME_INDEX:
4230 case AMDGPU::G_BLOCK_ADDR:
4231 case AMDGPU::G_READSTEADYCOUNTER:
4232 case AMDGPU::G_READCYCLECOUNTER: {
4234 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4237 case AMDGPU::G_DYN_STACKALLOC: {
4239 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4240 unsigned SrcBankID =
getRegBankID(
MI.getOperand(1).getReg(), MRI);
4241 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4244 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4249 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4250 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4253 case AMDGPU::G_INSERT: {
4258 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4259 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4260 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4261 OpdsMapping[3] =
nullptr;
4264 case AMDGPU::G_EXTRACT: {
4268 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4269 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4270 OpdsMapping[2] =
nullptr;
4273 case AMDGPU::G_BUILD_VECTOR:
4274 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4279 unsigned Src0BankID =
getRegBankID(
MI.getOperand(1).getReg(), MRI);
4280 unsigned Src1BankID =
getRegBankID(
MI.getOperand(2).getReg(), MRI);
4281 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4283 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4284 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4285 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4291 case AMDGPU::G_MERGE_VALUES:
4292 case AMDGPU::G_CONCAT_VECTORS: {
4297 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4299 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4300 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4303 case AMDGPU::G_BITREVERSE:
4304 case AMDGPU::G_BITCAST:
4305 case AMDGPU::G_INTTOPTR:
4306 case AMDGPU::G_PTRTOINT:
4307 case AMDGPU::G_FABS:
4308 case AMDGPU::G_FNEG: {
4311 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4314 case AMDGPU::G_AMDGPU_FFBH_U32:
4315 case AMDGPU::G_AMDGPU_FFBL_B32:
4316 case AMDGPU::G_CTLZ_ZERO_POISON:
4317 case AMDGPU::G_CTTZ_ZERO_POISON: {
4320 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4321 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4324 case AMDGPU::G_CTPOP: {
4327 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4332 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4335 case AMDGPU::G_TRUNC: {
4341 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4342 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4345 case AMDGPU::G_ZEXT:
4346 case AMDGPU::G_SEXT:
4347 case AMDGPU::G_ANYEXT:
4348 case AMDGPU::G_SEXT_INREG: {
4357 switch (SrcBank->
getID()) {
4358 case AMDGPU::SGPRRegBankID:
4359 DstBank = AMDGPU::SGPRRegBankID;
4362 DstBank = AMDGPU::VGPRRegBankID;
4368 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4369 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4373 case AMDGPU::G_IS_FPCLASS: {
4377 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4378 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4381 case AMDGPU::G_STORE: {
4388 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4389 OpdsMapping[0] = ValMapping;
4393 case AMDGPU::G_ICMP:
4394 case AMDGPU::G_FCMP: {
4400 AMDGPU::SGPRRegBankID);
4404 auto canUseSCCICMP = [&]() {
4407 return Size == 32 ||
4412 auto canUseSCCFCMP = [&]() {
4416 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4417 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4418 Op2Bank == AMDGPU::SGPRRegBankID &&
4419 Op3Bank == AMDGPU::SGPRRegBankID &&
4420 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4422 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4423 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4427 const unsigned ResultSize = 1;
4429 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4430 OpdsMapping[1] =
nullptr;
4431 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4432 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4435 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4437 unsigned SrcBankID =
getRegBankID(
MI.getOperand(1).getReg(), MRI);
4442 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4444 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4445 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4448 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4451 case AMDGPU::G_INSERT_VECTOR_ELT: {
4453 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4458 unsigned InsertEltBankID =
getRegBankID(
MI.getOperand(2).getReg(), MRI);
4459 unsigned IdxBankID =
getRegBankID(
MI.getOperand(3).getReg(), MRI);
4461 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4462 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4466 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4467 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4470 assert(InsertSize == 32 || InsertSize == 64);
4471 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4475 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4478 case AMDGPU::G_UNMERGE_VALUES: {
4483 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4485 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4489 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4490 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4491 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4492 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4493 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4494 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4495 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4496 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4497 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4498 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4499 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4500 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4501 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4502 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4503 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4504 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4505 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4506 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4507 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4508 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4509 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4510 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4529 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4530 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4531 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4532 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4533 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4534 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4535 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4536 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4537 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4538 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4539 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4540 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4541 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32:
4542 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32:
4543 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4544 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4545 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4568 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4594 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4595 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4596 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4597 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4598 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4606 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4607 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4608 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4611 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4614 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
4618 case AMDGPU::G_AMDGPU_SPONENTRY: {
4620 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4623 case AMDGPU::G_INTRINSIC:
4624 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4628 case Intrinsic::amdgcn_div_fmas:
4629 case Intrinsic::amdgcn_div_fixup:
4630 case Intrinsic::amdgcn_trig_preop:
4631 case Intrinsic::amdgcn_sin:
4632 case Intrinsic::amdgcn_cos:
4633 case Intrinsic::amdgcn_log_clamp:
4634 case Intrinsic::amdgcn_rcp_legacy:
4635 case Intrinsic::amdgcn_rsq_legacy:
4636 case Intrinsic::amdgcn_rsq_clamp:
4637 case Intrinsic::amdgcn_tanh:
4638 case Intrinsic::amdgcn_fmul_legacy:
4639 case Intrinsic::amdgcn_fma_legacy:
4640 case Intrinsic::amdgcn_frexp_mant:
4641 case Intrinsic::amdgcn_frexp_exp:
4642 case Intrinsic::amdgcn_fract:
4643 case Intrinsic::amdgcn_cvt_pknorm_i16:
4644 case Intrinsic::amdgcn_cvt_pknorm_u16:
4645 case Intrinsic::amdgcn_cvt_pk_i16:
4646 case Intrinsic::amdgcn_cvt_pk_u16:
4647 case Intrinsic::amdgcn_cvt_sr_pk_f16_f32:
4648 case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
4649 case Intrinsic::amdgcn_cvt_pk_f16_fp8:
4650 case Intrinsic::amdgcn_cvt_pk_f16_bf8:
4651 case Intrinsic::amdgcn_cvt_pk_fp8_f16:
4652 case Intrinsic::amdgcn_cvt_pk_bf8_f16:
4653 case Intrinsic::amdgcn_cvt_sr_fp8_f16:
4654 case Intrinsic::amdgcn_cvt_sr_bf8_f16:
4655 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp8:
4656 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp8:
4657 case Intrinsic::amdgcn_cvt_scale_pk8_f16_bf8:
4658 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_bf8:
4659 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp4:
4660 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp4:
4661 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:
4662 case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:
4663 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:
4664 case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:
4665 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:
4666 case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:
4667 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:
4668 case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:
4669 case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:
4670 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:
4671 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:
4672 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:
4673 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16:
4674 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32:
4675 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32:
4676 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32:
4677 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16:
4678 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16:
4679 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f32:
4680 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f32:
4681 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f16:
4682 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f16:
4683 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_bf16:
4684 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_bf16:
4685 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_bf16:
4686 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_bf16:
4687 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f16:
4688 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f16:
4689 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f32:
4690 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f32:
4691 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f32:
4692 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f16:
4693 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_bf16:
4694 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f32:
4695 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f32:
4696 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f16:
4697 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f16:
4698 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_bf16:
4699 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:
4700 case Intrinsic::amdgcn_sat_pk4_i4_i8:
4701 case Intrinsic::amdgcn_sat_pk4_u4_u8:
4702 case Intrinsic::amdgcn_fmed3:
4703 case Intrinsic::amdgcn_cubeid:
4704 case Intrinsic::amdgcn_cubema:
4705 case Intrinsic::amdgcn_cubesc:
4706 case Intrinsic::amdgcn_cubetc:
4707 case Intrinsic::amdgcn_sffbh:
4708 case Intrinsic::amdgcn_fmad_ftz:
4709 case Intrinsic::amdgcn_mbcnt_lo:
4710 case Intrinsic::amdgcn_mbcnt_hi:
4711 case Intrinsic::amdgcn_mul_u24:
4712 case Intrinsic::amdgcn_mul_i24:
4713 case Intrinsic::amdgcn_mulhi_u24:
4714 case Intrinsic::amdgcn_mulhi_i24:
4715 case Intrinsic::amdgcn_lerp:
4716 case Intrinsic::amdgcn_sad_u8:
4717 case Intrinsic::amdgcn_msad_u8:
4718 case Intrinsic::amdgcn_sad_hi_u8:
4719 case Intrinsic::amdgcn_sad_u16:
4720 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4721 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4722 case Intrinsic::amdgcn_mqsad_u32_u8:
4723 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4724 case Intrinsic::amdgcn_alignbyte:
4725 case Intrinsic::amdgcn_perm:
4726 case Intrinsic::amdgcn_prng_b32:
4727 case Intrinsic::amdgcn_fdot2:
4728 case Intrinsic::amdgcn_sdot2:
4729 case Intrinsic::amdgcn_udot2:
4730 case Intrinsic::amdgcn_sdot4:
4731 case Intrinsic::amdgcn_udot4:
4732 case Intrinsic::amdgcn_sdot8:
4733 case Intrinsic::amdgcn_udot8:
4734 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4735 case Intrinsic::amdgcn_fdot2_f16_f16:
4736 case Intrinsic::amdgcn_fdot2_f32_bf16:
4737 case Intrinsic::amdgcn_fdot2c_f32_bf16:
4738 case Intrinsic::amdgcn_sudot4:
4739 case Intrinsic::amdgcn_sudot8:
4740 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4741 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4742 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4743 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4744 case Intrinsic::amdgcn_cvt_f32_fp8:
4745 case Intrinsic::amdgcn_cvt_f32_fp8_e5m3:
4746 case Intrinsic::amdgcn_cvt_f32_bf8:
4747 case Intrinsic::amdgcn_cvt_off_f32_i4:
4748 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4749 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4750 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4751 case Intrinsic::amdgcn_cvt_pk_fp8_f32_e5m3:
4752 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4753 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4754 case Intrinsic::amdgcn_cvt_sr_fp8_f32_e5m3:
4755 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4756 case Intrinsic::amdgcn_cvt_sr_bf16_f32:
4757 case Intrinsic::amdgcn_cvt_sr_f16_f32:
4758 case Intrinsic::amdgcn_cvt_f16_fp8:
4759 case Intrinsic::amdgcn_cvt_f16_bf8:
4760 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
4761 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
4762 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:
4763 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16:
4764 case Intrinsic::amdgcn_cvt_scalef32_f16_fp8:
4765 case Intrinsic::amdgcn_cvt_scalef32_f16_bf8:
4766 case Intrinsic::amdgcn_cvt_scalef32_f32_fp8:
4767 case Intrinsic::amdgcn_cvt_scalef32_f32_bf8:
4768 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f32:
4769 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f32:
4770 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp8:
4771 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_bf8:
4772 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f16:
4773 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_bf16:
4774 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f16:
4775 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_bf16:
4776 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp4:
4777 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f32:
4778 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp4:
4779 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp4:
4780 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_fp6:
4781 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_bf6:
4782 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_bf6:
4783 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_bf6:
4784 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_fp6:
4785 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_fp6:
4786 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_bf8:
4787 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_bf8:
4788 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp8:
4789 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp8:
4790 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f16:
4791 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_bf16:
4792 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
4793 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
4794 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
4795 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
4796 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
4797 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
4798 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
4799 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
4800 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
4801 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_bf16:
4802 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f16:
4803 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f32:
4804 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_bf16:
4805 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f16:
4806 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f32:
4807 case Intrinsic::amdgcn_ashr_pk_i8_i32:
4808 case Intrinsic::amdgcn_ashr_pk_u8_i32:
4809 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:
4810 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32:
4811 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4812 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4813 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4814 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4815 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4816 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4817 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4818 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4819 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4820 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4821 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4822 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4823 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4824 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4825 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4826 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4827 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4828 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4829 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4830 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4831 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4832 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4833 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4834 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4835 case Intrinsic::amdgcn_wmma_f64_16x16x4_f64:
4836 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4837 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4838 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4839 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4840 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4841 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
4842 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8:
4843 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8:
4844 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8:
4845 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8:
4846 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8:
4847 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8:
4848 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8:
4849 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8:
4850 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8:
4851 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8:
4852 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8:
4853 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8:
4854 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8:
4855 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8:
4856 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8:
4857 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8:
4858 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
4859 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
4860 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
4861 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
4862 case Intrinsic::amdgcn_wmma_f32_32x16x128_f4:
4863 case Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4:
4864 case Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4:
4865 case Intrinsic::amdgcn_swmmac_f16_16x16x64_f16:
4866 case Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16:
4867 case Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16:
4868 case Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16:
4869 case Intrinsic::amdgcn_swmmac_f32_16x16x64_f16:
4870 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8:
4871 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8:
4872 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8:
4873 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8:
4874 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8:
4875 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8:
4876 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8:
4877 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8:
4878 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
4879 case Intrinsic::amdgcn_perm_pk16_b4_u4:
4880 case Intrinsic::amdgcn_perm_pk16_b6_u4:
4881 case Intrinsic::amdgcn_perm_pk16_b8_u4:
4882 case Intrinsic::amdgcn_add_max_i32:
4883 case Intrinsic::amdgcn_add_max_u32:
4884 case Intrinsic::amdgcn_add_min_i32:
4885 case Intrinsic::amdgcn_add_min_u32:
4886 case Intrinsic::amdgcn_pk_add_max_i16:
4887 case Intrinsic::amdgcn_pk_add_max_u16:
4888 case Intrinsic::amdgcn_pk_add_min_i16:
4889 case Intrinsic::amdgcn_pk_add_min_u16:
4891 case Intrinsic::amdgcn_log:
4892 case Intrinsic::amdgcn_exp2:
4893 case Intrinsic::amdgcn_rcp:
4894 case Intrinsic::amdgcn_rsq:
4895 case Intrinsic::amdgcn_sqrt: {
4902 case Intrinsic::amdgcn_sbfe:
4903 case Intrinsic::amdgcn_ubfe:
4907 case Intrinsic::amdgcn_ds_swizzle:
4908 case Intrinsic::amdgcn_ds_permute:
4909 case Intrinsic::amdgcn_ds_bpermute:
4910 case Intrinsic::amdgcn_update_dpp:
4911 case Intrinsic::amdgcn_mov_dpp8:
4912 case Intrinsic::amdgcn_mov_dpp:
4913 case Intrinsic::amdgcn_strict_wwm:
4914 case Intrinsic::amdgcn_wwm:
4915 case Intrinsic::amdgcn_strict_wqm:
4916 case Intrinsic::amdgcn_wqm:
4917 case Intrinsic::amdgcn_softwqm:
4918 case Intrinsic::amdgcn_set_inactive:
4919 case Intrinsic::amdgcn_set_inactive_chain_arg:
4920 case Intrinsic::amdgcn_permlane64:
4921 case Intrinsic::amdgcn_ds_bpermute_fi_b32:
4923 case Intrinsic::amdgcn_cvt_pkrtz:
4927 case Intrinsic::amdgcn_kernarg_segment_ptr:
4928 case Intrinsic::amdgcn_s_getpc:
4929 case Intrinsic::amdgcn_groupstaticsize:
4930 case Intrinsic::amdgcn_reloc_constant:
4931 case Intrinsic::returnaddress: {
4933 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4936 case Intrinsic::amdgcn_wqm_vote: {
4938 OpdsMapping[0] = OpdsMapping[2]
4939 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4942 case Intrinsic::amdgcn_ps_live: {
4943 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4946 case Intrinsic::amdgcn_div_scale: {
4949 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4950 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4953 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4954 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4957 case Intrinsic::amdgcn_class: {
4958 Register Src0Reg =
MI.getOperand(2).getReg();
4959 Register Src1Reg =
MI.getOperand(3).getReg();
4963 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4964 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4965 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4968 case Intrinsic::amdgcn_icmp:
4969 case Intrinsic::amdgcn_fcmp: {
4972 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4974 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4975 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4978 case Intrinsic::amdgcn_readlane: {
4982 unsigned IdxBank =
getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);
4983 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4986 case Intrinsic::amdgcn_readfirstlane: {
4989 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4990 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4993 case Intrinsic::amdgcn_writelane: {
4997 unsigned SrcBank =
getRegBankID(SrcReg, MRI, AMDGPU::SGPRRegBankID);
5000 unsigned IdxBank =
getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);
5001 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5005 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
5006 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
5007 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
5010 case Intrinsic::amdgcn_if_break: {
5012 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5013 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5014 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5017 case Intrinsic::amdgcn_permlane16:
5018 case Intrinsic::amdgcn_permlanex16: {
5020 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5021 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5022 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5027 case Intrinsic::amdgcn_permlane_bcast:
5028 case Intrinsic::amdgcn_permlane_up:
5029 case Intrinsic::amdgcn_permlane_down:
5030 case Intrinsic::amdgcn_permlane_xor: {
5032 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5033 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5038 case Intrinsic::amdgcn_permlane_idx_gen: {
5040 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5041 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5045 case Intrinsic::amdgcn_permlane16_var:
5046 case Intrinsic::amdgcn_permlanex16_var: {
5048 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5049 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5050 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5051 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5054 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
5055 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
5056 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
5057 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
5058 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
5059 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
5060 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
5061 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
5062 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
5063 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
5064 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
5065 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
5066 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
5067 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
5068 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
5069 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
5070 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
5071 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
5072 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
5073 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
5074 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
5075 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
5076 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
5077 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
5078 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
5079 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
5080 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
5081 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
5082 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
5083 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
5084 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
5085 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
5086 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
5087 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
5088 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
5089 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
5090 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
5091 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
5092 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8:
5093 case Intrinsic::amdgcn_mfma_f32_16x16x32_f16:
5094 case Intrinsic::amdgcn_mfma_f32_32x32x16_f16:
5095 case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
5096 case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
5097 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
5099 unsigned MinNumRegsRequired = DstSize / 32;
5109 bool UseAGPRForm = !
Subtarget.hasGFX90AInsts() ||
5110 Info->selectAGPRFormMFMA(MinNumRegsRequired);
5122 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
5123 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
5125 unsigned MinNumRegsRequired = DstSize / 32;
5144 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
5145 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
5146 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
5147 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
5148 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
5149 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
5150 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
5151 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
5152 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
5153 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
5154 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
5155 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
5156 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
5157 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
5158 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
5159 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
5160 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
5161 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
5162 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
5163 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
5164 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
5165 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
5166 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
5167 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
5168 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
5169 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
5170 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
5171 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
5174 unsigned MinNumRegsRequired = DstSize / 32;
5190 case Intrinsic::amdgcn_interp_p1:
5191 case Intrinsic::amdgcn_interp_p2:
5192 case Intrinsic::amdgcn_interp_mov:
5193 case Intrinsic::amdgcn_interp_p1_f16:
5194 case Intrinsic::amdgcn_interp_p2_f16:
5195 case Intrinsic::amdgcn_lds_param_load: {
5196 const int M0Idx =
MI.getNumOperands() - 1;
5197 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5198 unsigned M0Bank =
getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);
5201 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5202 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5203 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5207 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5210 case Intrinsic::amdgcn_interp_inreg_p10:
5211 case Intrinsic::amdgcn_interp_inreg_p2:
5212 case Intrinsic::amdgcn_interp_inreg_p10_f16:
5213 case Intrinsic::amdgcn_interp_inreg_p2_f16:
5214 case Intrinsic::amdgcn_interp_p10_rtz_f16:
5215 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
5217 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5218 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5219 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5220 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5223 case Intrinsic::amdgcn_permlane16_swap:
5224 case Intrinsic::amdgcn_permlane32_swap: {
5226 OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] =
5227 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5230 case Intrinsic::amdgcn_ballot: {
5233 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5234 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
5237 case Intrinsic::amdgcn_inverse_ballot: {
5239 Register MaskReg =
MI.getOperand(2).getReg();
5241 unsigned MaskBank =
getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
5242 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5243 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5246 case Intrinsic::amdgcn_bitop3: {
5248 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5249 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5250 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5251 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5254 case Intrinsic::amdgcn_s_quadmask:
5255 case Intrinsic::amdgcn_s_wqm: {
5256 Register MaskReg =
MI.getOperand(2).getReg();
5258 unsigned MaskBank =
getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
5259 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
5260 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5263 case Intrinsic::amdgcn_wave_reduce_add:
5264 case Intrinsic::amdgcn_wave_reduce_fadd:
5265 case Intrinsic::amdgcn_wave_reduce_sub:
5266 case Intrinsic::amdgcn_wave_reduce_fsub:
5267 case Intrinsic::amdgcn_wave_reduce_min:
5268 case Intrinsic::amdgcn_wave_reduce_umin:
5269 case Intrinsic::amdgcn_wave_reduce_fmin:
5270 case Intrinsic::amdgcn_wave_reduce_max:
5271 case Intrinsic::amdgcn_wave_reduce_umax:
5272 case Intrinsic::amdgcn_wave_reduce_fmax:
5273 case Intrinsic::amdgcn_wave_reduce_and:
5274 case Intrinsic::amdgcn_wave_reduce_or:
5275 case Intrinsic::amdgcn_wave_reduce_xor: {
5277 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5281 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
5284 case Intrinsic::amdgcn_s_bitreplicate: {
5285 Register MaskReg =
MI.getOperand(2).getReg();
5286 unsigned MaskBank =
getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
5287 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5288 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
5291 case Intrinsic::amdgcn_wave_shuffle: {
5293 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
5294 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
5295 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
5301 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
5302 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
5303 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
5304 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
5305 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
5308 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
5315 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
5316 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
5317 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5319 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
5320 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
5321 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
5322 unsigned LastRegOpIdx =
MI.getNumExplicitOperands() - 1 - NumMods;
5324 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5326 OpdsMapping[1] = AMDGPU::getValueMapping(
5327 AMDGPU::VGPRRegBankID,
5329 OpdsMapping[2] = AMDGPU::getValueMapping(
5330 AMDGPU::VGPRRegBankID,
5333 OpdsMapping[LastRegOpIdx] =
5335 if (LastRegOpIdx == 3) {
5340 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5343 unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
5344 for (
unsigned I = FirstSrcOpIdx;
I < LastRegOpIdx; ++
I) {
5346 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5351 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
5352 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
5355 case Intrinsic::amdgcn_s_getreg:
5356 case Intrinsic::amdgcn_s_memtime:
5357 case Intrinsic::amdgcn_s_memrealtime:
5358 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
5359 case Intrinsic::amdgcn_s_sendmsg_rtn: {
5361 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5364 case Intrinsic::amdgcn_global_atomic_fmin_num:
5365 case Intrinsic::amdgcn_global_atomic_fmax_num:
5366 case Intrinsic::amdgcn_flat_atomic_fmin_num:
5367 case Intrinsic::amdgcn_flat_atomic_fmax_num:
5368 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
5369 case Intrinsic::amdgcn_global_load_tr_b64:
5370 case Intrinsic::amdgcn_global_load_tr_b128:
5371 case Intrinsic::amdgcn_global_load_tr4_b64:
5372 case Intrinsic::amdgcn_global_load_tr6_b96:
5373 case Intrinsic::amdgcn_ds_load_tr8_b64:
5374 case Intrinsic::amdgcn_ds_load_tr16_b128:
5375 case Intrinsic::amdgcn_ds_load_tr4_b64:
5376 case Intrinsic::amdgcn_ds_load_tr6_b96:
5377 case Intrinsic::amdgcn_ds_read_tr4_b64:
5378 case Intrinsic::amdgcn_ds_read_tr6_b96:
5379 case Intrinsic::amdgcn_ds_read_tr8_b64:
5380 case Intrinsic::amdgcn_ds_read_tr16_b64:
5381 case Intrinsic::amdgcn_ds_atomic_async_barrier_arrive_b64:
5382 case Intrinsic::amdgcn_ds_atomic_barrier_arrive_rtn_b64:
5384 case Intrinsic::amdgcn_ds_ordered_add:
5385 case Intrinsic::amdgcn_ds_ordered_swap: {
5387 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5389 AMDGPU::SGPRRegBankID);
5390 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
5391 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5394 case Intrinsic::amdgcn_ds_append:
5395 case Intrinsic::amdgcn_ds_consume: {
5397 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5401 case Intrinsic::amdgcn_exp_compr:
5402 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5403 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5405 case Intrinsic::amdgcn_exp:
5407 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5408 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5409 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5410 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5412 case Intrinsic::amdgcn_exp_row:
5413 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5414 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5415 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5416 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5419 case Intrinsic::amdgcn_s_alloc_vgpr:
5420 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1);
5421 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
5423 case Intrinsic::amdgcn_s_sendmsg:
5424 case Intrinsic::amdgcn_s_sendmsghalt: {
5427 AMDGPU::SGPRRegBankID);
5428 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5431 case Intrinsic::amdgcn_s_setreg: {
5434 AMDGPU::SGPRRegBankID);
5435 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5438 case Intrinsic::amdgcn_s_ttracedata: {
5441 getRegBankID(
MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID);
5442 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5445 case Intrinsic::amdgcn_end_cf: {
5447 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5450 case Intrinsic::amdgcn_else: {
5452 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5453 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5454 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5457 case Intrinsic::amdgcn_init_whole_wave:
5458 case Intrinsic::amdgcn_live_mask: {
5459 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5462 case Intrinsic::amdgcn_wqm_demote:
5463 case Intrinsic::amdgcn_kill: {
5464 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5467 case Intrinsic::amdgcn_raw_buffer_load:
5468 case Intrinsic::amdgcn_raw_ptr_buffer_load:
5469 case Intrinsic::amdgcn_raw_atomic_buffer_load:
5470 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
5471 case Intrinsic::amdgcn_raw_tbuffer_load:
5472 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
5481 case Intrinsic::amdgcn_raw_buffer_load_lds:
5482 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
5483 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
5484 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds: {
5491 case Intrinsic::amdgcn_raw_buffer_store:
5492 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5493 case Intrinsic::amdgcn_raw_buffer_store_format:
5494 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5495 case Intrinsic::amdgcn_raw_tbuffer_store:
5496 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5503 case Intrinsic::amdgcn_struct_buffer_load:
5504 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5505 case Intrinsic::amdgcn_struct_tbuffer_load:
5506 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5507 case Intrinsic::amdgcn_struct_atomic_buffer_load:
5508 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
5516 case Intrinsic::amdgcn_struct_buffer_load_lds:
5517 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
5518 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
5519 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: {
5527 case Intrinsic::amdgcn_struct_buffer_store:
5528 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5529 case Intrinsic::amdgcn_struct_tbuffer_store:
5530 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5538 case Intrinsic::amdgcn_init_exec_from_input: {
5540 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5543 case Intrinsic::amdgcn_ds_gws_init:
5544 case Intrinsic::amdgcn_ds_gws_barrier:
5545 case Intrinsic::amdgcn_ds_gws_sema_br: {
5546 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5550 AMDGPU::SGPRRegBankID);
5551 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5554 case Intrinsic::amdgcn_ds_gws_sema_v:
5555 case Intrinsic::amdgcn_ds_gws_sema_p:
5556 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5559 AMDGPU::SGPRRegBankID);
5560 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5563 case Intrinsic::amdgcn_cluster_load_b32:
5564 case Intrinsic::amdgcn_cluster_load_b64:
5565 case Intrinsic::amdgcn_cluster_load_b128: {
5569 getRegBankID(
MI.getOperand(4).getReg(), MRI, AMDGPU::SGPRRegBankID);
5570 OpdsMapping[4] = AMDGPU::getValueMapping(M0Bank, 32);
5573 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:
5574 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
5575 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
5576 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
5581 getRegBankID(
MI.getOperand(5).getReg(), MRI, AMDGPU::SGPRRegBankID);
5582 OpdsMapping[5] = AMDGPU::getValueMapping(M0Bank, 32);
5585 case Intrinsic::amdgcn_global_store_async_from_lds_b8:
5586 case Intrinsic::amdgcn_global_store_async_from_lds_b32:
5587 case Intrinsic::amdgcn_global_store_async_from_lds_b64:
5588 case Intrinsic::amdgcn_global_store_async_from_lds_b128:
5589 case Intrinsic::amdgcn_global_load_async_to_lds_b8:
5590 case Intrinsic::amdgcn_global_load_async_to_lds_b32:
5591 case Intrinsic::amdgcn_global_load_async_to_lds_b64:
5592 case Intrinsic::amdgcn_global_load_async_to_lds_b128: {
5598 case Intrinsic::amdgcn_load_to_lds:
5599 case Intrinsic::amdgcn_load_async_to_lds:
5600 case Intrinsic::amdgcn_global_load_lds:
5601 case Intrinsic::amdgcn_global_load_async_lds: {
5607 case Intrinsic::amdgcn_lds_direct_load: {
5608 const int M0Idx =
MI.getNumOperands() - 1;
5609 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5610 unsigned M0Bank =
getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);
5613 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5614 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5615 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5619 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5622 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5623 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5627 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
5628 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
5629 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
5630 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn: {
5643 case Intrinsic::amdgcn_s_sleep_var:
5646 case Intrinsic::amdgcn_s_barrier_join:
5647 case Intrinsic::amdgcn_s_wakeup_barrier:
5650 case Intrinsic::amdgcn_s_barrier_init:
5651 case Intrinsic::amdgcn_s_barrier_signal_var:
5655 case Intrinsic::amdgcn_s_barrier_signal_isfirst: {
5656 const unsigned ResultSize = 1;
5658 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5661 case Intrinsic::amdgcn_s_get_barrier_state:
5662 case Intrinsic::amdgcn_s_get_named_barrier_state: {
5667 case Intrinsic::amdgcn_pops_exiting_wave_id:
5669 case Intrinsic::amdgcn_tensor_load_to_lds:
5670 case Intrinsic::amdgcn_tensor_store_from_lds: {
5673 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I) {
5674 if (
MI.getOperand(
I).isReg()) {
5678 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5683 case Intrinsic::amdgcn_s_prefetch_data:
5684 case Intrinsic::amdgcn_s_prefetch_inst: {
5689 case Intrinsic::amdgcn_flat_prefetch:
5690 case Intrinsic::amdgcn_global_prefetch:
5697 case AMDGPU::G_SELECT: {
5700 AMDGPU::SGPRRegBankID);
5702 AMDGPU::SGPRRegBankID);
5703 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5704 Op3Bank == AMDGPU::SGPRRegBankID;
5706 unsigned CondBankDefault = SGPRSrcs ?
5707 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5710 if (CondBank == AMDGPU::SGPRRegBankID)
5711 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5712 else if (CondBank == AMDGPU::VGPRRegBankID)
5713 CondBank = AMDGPU::VCCRegBankID;
5715 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5716 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5718 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5722 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5723 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5724 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5725 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5727 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5728 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5729 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5730 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5736 case AMDGPU::G_SI_CALL: {
5737 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5743 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5744 if (
MI.getOperand(
I).isReg()) {
5748 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5753 case AMDGPU::G_LOAD:
5754 case AMDGPU::G_ZEXTLOAD:
5755 case AMDGPU::G_SEXTLOAD:
5758 case AMDGPU::G_ATOMICRMW_XCHG:
5759 case AMDGPU::G_ATOMICRMW_ADD:
5760 case AMDGPU::G_ATOMICRMW_SUB:
5761 case AMDGPU::G_ATOMICRMW_AND:
5762 case AMDGPU::G_ATOMICRMW_OR:
5763 case AMDGPU::G_ATOMICRMW_XOR:
5764 case AMDGPU::G_ATOMICRMW_MAX:
5765 case AMDGPU::G_ATOMICRMW_MIN:
5766 case AMDGPU::G_ATOMICRMW_UMAX:
5767 case AMDGPU::G_ATOMICRMW_UMIN:
5768 case AMDGPU::G_ATOMICRMW_FADD:
5769 case AMDGPU::G_ATOMICRMW_FMIN:
5770 case AMDGPU::G_ATOMICRMW_FMAX:
5771 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5772 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5773 case AMDGPU::G_ATOMICRMW_USUB_COND:
5774 case AMDGPU::G_ATOMICRMW_USUB_SAT:
5775 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5781 case AMDGPU::G_ATOMIC_CMPXCHG: {
5788 case AMDGPU::G_BRCOND: {
5790 AMDGPU::SGPRRegBankID);
5792 if (Bank != AMDGPU::SGPRRegBankID)
5793 Bank = AMDGPU::VCCRegBankID;
5795 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5798 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
5800 case AMDGPU::G_PREFETCH:
5803 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP:
5804 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN:
5805 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5807 case AMDGPU::G_AMDGPU_FLAT_LOAD_MONITOR:
5808 case AMDGPU::G_AMDGPU_GLOBAL_LOAD_MONITOR: {
5811 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5812 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
5819 MI.getNumOperands());