29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
136 assert(Elts.
size() == SubRegClass.
size() &&
"array size mismatch");
137 unsigned NumElts = Elts.
size();
140 for (
unsigned i = 0; i < NumElts; ++i) {
141 Ops[2 * i + 1] = Elts[i];
151 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
155#ifdef EXPENSIVE_CHECKS
160 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
181bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
215 case AMDGPUISD::FRACT:
216 case AMDGPUISD::CLAMP:
217 case AMDGPUISD::COS_HW:
218 case AMDGPUISD::SIN_HW:
219 case AMDGPUISD::FMIN3:
220 case AMDGPUISD::FMAX3:
221 case AMDGPUISD::FMED3:
222 case AMDGPUISD::FMAD_FTZ:
225 case AMDGPUISD::RCP_IFLAG:
235 case AMDGPUISD::DIV_FIXUP:
245#ifdef EXPENSIVE_CHECKS
249 assert(L->isLCSSAForm(DT));
257#ifdef EXPENSIVE_CHECKS
265 assert(Subtarget->d16PreservesUnusedBits());
266 MVT VT =
N->getValueType(0).getSimpleVT();
267 if (VT != MVT::v2i16 && VT != MVT::v2f16)
289 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
292 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
298 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
311 if (LdLo &&
Lo.hasOneUse()) {
317 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
320 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
332 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
345 if (!Subtarget->d16PreservesUnusedBits())
350 bool MadeChange =
false;
351 while (Position !=
CurDAG->allnodes_begin()) {
356 switch (
N->getOpcode()) {
367 CurDAG->RemoveDeadNodes();
373bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
379 return TII->isInlineConstant(
C->getAPIntValue());
382 return TII->isInlineConstant(
C->getValueAPF());
392 unsigned OpNo)
const {
393 if (!
N->isMachineOpcode()) {
396 if (
Reg.isVirtual()) {
401 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
402 return TRI->getPhysRegBaseClass(
Reg);
408 switch (
N->getMachineOpcode()) {
410 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
411 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
412 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
416 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
420 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
422 case AMDGPU::REG_SEQUENCE: {
423 unsigned RCID =
N->getConstantOperandVal(0);
424 const TargetRegisterClass *SuperRC =
425 Subtarget->getRegisterInfo()->getRegClass(RCID);
427 SDValue SubRegOp =
N->getOperand(OpNo + 1);
429 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
438 Ops.push_back(NewChain);
439 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
440 Ops.push_back(
N->getOperand(i));
443 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
450 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
453 return glueCopyToOp(
N,
M0,
M0.getValue(1));
456SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
459 if (Subtarget->ldsRequiresM0Init())
461 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
463 MachineFunction &
MF =
CurDAG->getMachineFunction();
464 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
466 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
473 SDNode *
Lo =
CurDAG->getMachineNode(
474 AMDGPU::S_MOV_B32,
DL, MVT::i32,
476 SDNode *
Hi =
CurDAG->getMachineNode(
477 AMDGPU::S_MOV_B32,
DL, MVT::i32,
480 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
484 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
487SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
492 uint32_t LHSVal, RHSVal;
496 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
498 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
506 EVT VT =
N->getValueType(0);
510 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
512 if (NumVectorElts == 1) {
513 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
518 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
519 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
522 bool AllConst =
true;
524 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
532 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
535 C |= Val << (EltSize *
I);
540 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
541 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
547 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
554 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
555 bool IsRegSeq =
true;
556 unsigned NOps =
N->getNumOperands();
558 assert(IsGCN || EltSizeInRegs == 1);
559 for (
unsigned i = 0; i < NOps; i++) {
566 i * EltSizeInRegs, EltSizeInRegs)
568 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
569 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
571 if (NOps != NumVectorElts) {
576 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
578 i * EltSizeInRegs, EltSizeInRegs)
580 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
581 RegSeqArgs[1 + (2 * i) + 1] =
588 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
592 EVT VT =
N->getValueType(0);
596 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
610 Mask[0] < 4 && Mask[1] < 4);
612 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
613 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
614 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
615 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
618 Src0SubReg = Src1SubReg;
620 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
625 Src1SubReg = Src0SubReg;
627 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
637 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
638 Src1SubReg == AMDGPU::sub0) {
654 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
655 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
658 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
659 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
669 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
671 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
674 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
675 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
676 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
677 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
681 unsigned int Opc =
N->getOpcode();
682 if (
N->isMachineOpcode()) {
690 N = glueCopyToM0LDSInit(
N);
700 if (
N->getValueType(0) == MVT::i64) {
701 SelectAddcSubbI64(
N);
705 if (
N->getValueType(0) != MVT::i32)
712 if (
N->getValueType(0) == MVT::i64) {
713 SelectAddcSubbI64(
N);
717 SelectUADDO_USUBO(
N);
720 case AMDGPUISD::FMUL_W_CHAIN: {
721 SelectFMUL_W_CHAIN(
N);
724 case AMDGPUISD::FMA_W_CHAIN: {
725 SelectFMA_W_CHAIN(
N);
731 EVT VT =
N->getValueType(0);
749 N->isDivergent() ?
TRI->getDefaultVectorSuperClassForBitWidth(VecInBits)
761 if (
N->getValueType(0) == MVT::i128) {
762 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
763 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
764 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
765 }
else if (
N->getValueType(0) == MVT::i64) {
766 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
767 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
768 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
772 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
773 N->getOperand(1), SubReg1 };
775 N->getValueType(0),
Ops));
781 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
782 Subtarget->has64BitLiterals())
787 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
792 Imm =
C->getZExtValue();
801 case AMDGPUISD::BFE_I32:
802 case AMDGPUISD::BFE_U32: {
828 case AMDGPUISD::DIV_SCALE: {
839 return SelectMUL_LOHI(
N);
850 if (
N->getValueType(0) != MVT::i32)
861 case AMDGPUISD::CVT_PKRTZ_F16_F32:
862 case AMDGPUISD::CVT_PKNORM_I16_F32:
863 case AMDGPUISD::CVT_PKNORM_U16_F32:
864 case AMDGPUISD::CVT_PK_U16_U32:
865 case AMDGPUISD::CVT_PK_I16_I32: {
867 if (
N->getValueType(0) == MVT::i32) {
868 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
870 { N->getOperand(0), N->getOperand(1) });
878 SelectINTRINSIC_W_CHAIN(
N);
882 SelectINTRINSIC_WO_CHAIN(
N);
886 SelectINTRINSIC_VOID(
N);
890 SelectWAVE_ADDRESS(
N);
894 SelectSTACKRESTORE(
N);
903 if (!Subtarget->hasSDWA())
913 return RHS->getZExtValue() == 0xFF || RHS->getZExtValue() == 0xFFFF;
917 return (RHS->getZExtValue() % 8) == 0;
922bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
925 return Term->getMetadata(
"amdgpu.uniform") ||
926 Term->getMetadata(
"structurizecfg.uniform");
929bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
930 unsigned ShAmtBits)
const {
933 const APInt &
RHS =
N->getConstantOperandAPInt(1);
934 if (
RHS.countr_one() >= ShAmtBits)
964 N1 =
Lo.getOperand(1);
974 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
989 return "AMDGPU DAG->DAG Pattern Instruction Selection";
999#ifdef EXPENSIVE_CHECKS
1005 for (
auto &L : LI.getLoopsInPreorder())
1006 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
1028 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
1030 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1044SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1046 SDNode *Mov =
CurDAG->getMachineNode(
1047 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1048 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1052void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1057 if (
N->isDivergent()) {
1059 : AMDGPU::V_SUBB_U32_e64;
1061 N,
Opc,
N->getVTList(),
1063 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1066 : AMDGPU::S_SUB_CO_PSEUDO;
1067 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1071void AMDGPUDAGToDAGISel::SelectAddcSubbI64(
SDNode *
N) {
1076 unsigned Opcode =
N->getOpcode();
1083 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1084 MVT::i32,
LHS, Sub0);
1085 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1086 MVT::i32,
LHS, Sub1);
1088 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1089 MVT::i32,
RHS, Sub0);
1090 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1091 MVT::i32,
RHS, Sub1);
1093 SDVTList VTList =
CurDAG->getVTList(MVT::i32,
N->getValueType(1));
1095 static const unsigned NoCarryOpcMap[2][2] = {
1096 {AMDGPU::S_USUBO_PSEUDO, AMDGPU::S_UADDO_PSEUDO},
1097 {AMDGPU::V_SUB_CO_U32_e64, AMDGPU::V_ADD_CO_U32_e64}};
1098 static const unsigned CarryOpcMap[2][2] = {
1099 {AMDGPU::S_SUB_CO_PSEUDO, AMDGPU::S_ADD_CO_PSEUDO},
1100 {AMDGPU::V_SUBB_U32_e64, AMDGPU::V_ADDC_U32_e64}};
1102 bool IsVALU =
N->isDivergent();
1104 unsigned NoCarryOpc = NoCarryOpcMap[IsVALU][IsAdd];
1105 unsigned CarryOpc = CarryOpcMap[IsVALU][IsAdd];
1109 if (!ConsumeCarry) {
1112 AddLo =
CurDAG->getMachineNode(NoCarryOpc,
DL, VTList, Args);
1115 AddLo =
CurDAG->getMachineNode(NoCarryOpc,
DL, VTList, Args);
1121 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1124 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1132 AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1135 AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1138 unsigned RC = IsVALU ? AMDGPU::VReg_64RegClassID : AMDGPU::SReg_64RegClassID;
1139 SDValue RegSequenceArgs[] = {
CurDAG->getTargetConstant(RC,
DL, MVT::i32),
1143 MVT::i64, RegSequenceArgs);
1149void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1154 bool IsVALU =
N->isDivergent();
1156 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1158 if (UI.getUse().getResNo() == 1) {
1159 if (UI->isMachineOpcode()) {
1160 if (UI->getMachineOpcode() !=
1161 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1174 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1177 N,
Opc,
N->getVTList(),
1178 {N->getOperand(0), N->getOperand(1),
1179 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1181 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1183 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1184 {N->getOperand(0), N->getOperand(1)});
1188void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1192 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1193 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1194 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1195 Ops[8] =
N->getOperand(0);
1196 Ops[9] =
N->getOperand(4);
1200 bool UseFMAC = Subtarget->hasDLInsts() &&
1204 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1205 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1208void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1212 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1213 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1214 Ops[6] =
N->getOperand(0);
1215 Ops[7] =
N->getOperand(3);
1217 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1222void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1223 EVT VT =
N->getValueType(0);
1225 assert(VT == MVT::f32 || VT == MVT::f64);
1228 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1233 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1234 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1235 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1241void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1245 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() && !
N->hasAnyUseOfValue(1);
1246 if (Subtarget->hasMADIntraFwdBug())
1247 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1248 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1249 else if (UseNoCarry)
1250 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1252 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1255 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1259 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1270void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1275 if (Subtarget->hasMadNC64_32Insts()) {
1276 VTList =
CurDAG->getVTList(MVT::i64);
1277 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1279 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1280 if (Subtarget->hasMADIntraFwdBug()) {
1281 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1282 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1284 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1291 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1293 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1294 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1295 MVT::i32,
SDValue(Mad, 0), Sub0);
1299 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1300 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1301 MVT::i32,
SDValue(Mad, 0), Sub1);
1311 if (!
Base || Subtarget->hasUsableDSOffset() ||
1312 Subtarget->unsafeDSOffsetFoldingEnabled())
1323 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1336 int64_t ByteOffset =
C->getSExtValue();
1337 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1346 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1352 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1353 if (Subtarget->hasAddNoCarryInsts()) {
1354 SubOp = AMDGPU::V_SUB_U32_e64;
1356 CurDAG->getTargetConstant(0, {}, MVT::i1));
1359 MachineSDNode *MachineSub =
1360 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1376 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1378 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1379 DL, MVT::i32, Zero);
1381 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1388 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1392bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1394 unsigned Size)
const {
1395 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1400 if (!
Base || Subtarget->hasUsableDSOffset() ||
1401 Subtarget->unsafeDSOffsetFoldingEnabled())
1419bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1425 if (Subtarget->hasSignedScratchOffsets())
1435 ConstantSDNode *ImmOp =
nullptr;
1446bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1452 if (Subtarget->hasSignedScratchOffsets())
1462bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1476 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1479 auto LHS =
Base.getOperand(0);
1480 auto RHS =
Base.getOperand(1);
1488 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1494 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1499 unsigned Size)
const {
1502 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1507 unsigned OffsetValue1 = OffsetValue0 +
Size;
1510 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1512 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1513 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1518 if (
const ConstantSDNode *
C =
1520 unsigned OffsetValue0 =
C->getZExtValue();
1521 unsigned OffsetValue1 = OffsetValue0 +
Size;
1523 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1533 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1537 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1538 if (Subtarget->hasAddNoCarryInsts()) {
1539 SubOp = AMDGPU::V_SUB_U32_e64;
1541 CurDAG->getTargetConstant(0, {}, MVT::i1));
1544 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1549 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1551 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1557 unsigned OffsetValue0 = CAddr->getZExtValue();
1558 unsigned OffsetValue1 = OffsetValue0 +
Size;
1560 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1562 MachineSDNode *MovZero =
1563 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1565 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1566 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1574 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1575 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1585 if (Subtarget->useFlatForGlobal())
1590 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1591 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1592 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1593 SOffset = Subtarget->hasRestrictedSOffset()
1594 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1595 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1597 ConstantSDNode *C1 =
nullptr;
1599 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1612 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1618 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1634 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1636 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1640 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1650 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1661 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1667bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1670 SDValue Ptr, Offen, Idxen, Addr64;
1674 if (!Subtarget->hasAddr64())
1677 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1681 if (
C->getSExtValue()) {
1694std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1699 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1705 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1708bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1714 MachineFunction &
MF =
CurDAG->getMachineFunction();
1715 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1717 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1720 int64_t
Imm = CAddr->getSExtValue();
1721 const int64_t NullPtr =
1724 if (Imm != NullPtr) {
1727 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1728 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1729 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1730 VAddr =
SDValue(MovHighBits, 0);
1732 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1733 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1738 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1759 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1760 if (
TII->isLegalMUBUFImmOffset(C1) &&
1761 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1762 CurDAG->SignBitIsZero(N0))) {
1763 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1764 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1770 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1771 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1779 if (!
Reg.isPhysical())
1781 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1782 return RC &&
TRI.isSGPRClass(RC);
1785bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1790 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1791 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1792 MachineFunction &
MF =
CurDAG->getMachineFunction();
1793 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1798 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1804 ConstantSDNode *CAddr;
1817 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1822 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1828bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1831 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1832 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1834 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1840 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1853bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1855 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1856 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1860 SOffset = ByteOffsetNode;
1878bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
1882 int64_t OffsetVal = 0;
1886 bool CanHaveFlatSegmentOffsetBug =
1887 Subtarget->hasFlatSegmentOffsetBug() &&
1888 FlatVariant == FlatAddrSpace::FLAT &&
1891 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1893 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1894 (FlatVariant != FlatAddrSpace::FlatScratch ||
1895 isFlatScratchBaseLegal(Addr))) {
1903 if (COffsetVal == 0 || FlatVariant != FlatAddrSpace::FLAT || IsInBounds) {
1904 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1905 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1907 OffsetVal = COffsetVal;
1920 uint64_t RemainderOffset;
1922 std::tie(OffsetVal, RemainderOffset) =
1923 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1926 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1933 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1934 if (Subtarget->hasAddNoCarryInsts()) {
1935 AddOp = AMDGPU::V_ADD_U32_e64;
1944 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
1946 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
1948 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1949 DL, MVT::i32, N0, Sub0);
1950 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1951 DL, MVT::i32, N0, Sub1);
1954 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1956 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1959 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1960 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1962 SDNode *Addc =
CurDAG->getMachineNode(
1963 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1967 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
1972 MVT::i64, RegSequenceArgs),
1981 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1985bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1988 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1992bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1995 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1999bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
2002 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
2010 if (
Op.getValueType() == MVT::i32)
2025bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2028 bool NeedIOffset)
const {
2030 int64_t ImmOffset = 0;
2031 ScaleOffset =
false;
2037 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2039 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2043 FlatAddrSpace::FlatGlobal)) {
2045 ImmOffset = COffsetVal;
2046 }
else if (!
LHS->isDivergent()) {
2047 if (COffsetVal > 0) {
2052 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2054 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2058 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2060 SDNode *VMov =
CurDAG->getMachineNode(
2061 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2062 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2065 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2075 unsigned NumLiterals =
2076 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2077 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2078 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2087 if (!
LHS->isDivergent()) {
2090 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2092 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2099 if (!SAddr && !
RHS->isDivergent()) {
2101 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2103 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2110 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2115 if (Subtarget->hasScaleOffset() &&
2116 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2131 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2144 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2145 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2147 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2151bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2156 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2164bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2169 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2174 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2180bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2186 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2191 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2197bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2202 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2206 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2210bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2216 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2222 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2228bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2234 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2255 FI->getValueType(0));
2265bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2274 int64_t COffsetVal = 0;
2276 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2285 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2288 FlatAddrSpace::FlatScratch)) {
2289 int64_t SplitImmOffset, RemainderOffset;
2290 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2293 COffsetVal = SplitImmOffset;
2297 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2298 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2299 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2304 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2310bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2312 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2318 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2325 return (VMax & 3) + (
SMax & 3) >= 4;
2328bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2332 int64_t ImmOffset = 0;
2336 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2338 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2343 ImmOffset = COffsetVal;
2344 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2348 int64_t SplitImmOffset, RemainderOffset;
2349 std::tie(SplitImmOffset, RemainderOffset) =
2354 SDNode *VMov =
CurDAG->getMachineNode(
2355 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2356 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2359 if (!isFlatScratchBaseLegal(Addr))
2361 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2363 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2364 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2376 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2379 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2386 if (OrigAddr != Addr) {
2387 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2390 if (!isFlatScratchBaseLegalSV(OrigAddr))
2394 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2397 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2399 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2408bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2411 int64_t ImmOffset)
const {
2412 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2414 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2426 bool IsSigned)
const {
2427 bool ScaleOffset =
false;
2428 if (!Subtarget->hasScaleOffset() || !
Offset)
2442 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2443 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2444 (
Offset.isMachineOpcode() &&
2445 Offset.getMachineOpcode() ==
2446 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2447 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2449 ScaleOffset =
C->getZExtValue() ==
Size;
2461bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2463 bool Imm32Only,
bool IsBuffer,
2464 bool HasSOffset, int64_t ImmOffset,
2465 bool *ScaleOffset)
const {
2467 "Cannot match both soffset and offset at the same time!");
2472 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2482 *SOffset = ByteOffsetNode;
2483 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2489 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2496 SDLoc SL(ByteOffsetNode);
2500 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2502 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2503 if (EncodedOffset &&
Offset && !Imm32Only) {
2504 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2513 if (EncodedOffset &&
Offset && Imm32Only) {
2514 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2522 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2524 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2531SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2538 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2539 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2540 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2541 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2544 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2546 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2547 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2549 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2552 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2559bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2562 bool IsBuffer,
bool HasSOffset,
2564 bool *ScaleOffset)
const {
2566 assert(!Imm32Only && !IsBuffer);
2569 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2574 ImmOff =
C->getSExtValue();
2576 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2577 true, ImmOff, ScaleOffset);
2597 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2598 ImmOffset, ScaleOffset)) {
2602 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2603 ImmOffset, ScaleOffset)) {
2612 bool Imm32Only,
bool *ScaleOffset)
const {
2613 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2616 SBase = Expand32BitAddress(SBase);
2621 SBase = Expand32BitAddress(Addr);
2622 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2629bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2631 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2635bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2638 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2645 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2646 false, &ScaleOffset))
2650 SDLoc(
N), MVT::i32);
2654bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2659 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2663 SDLoc(
N), MVT::i32);
2668 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2672bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2675 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2679bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2683 return N.getValueType() == MVT::i32 &&
2684 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2689bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2694 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2719SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2723 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2727 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2729 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2733 uint32_t PackedVal =
Offset | (Width << 16);
2734 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2736 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2739void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2744 const SDValue &Shl =
N->getOperand(0);
2749 uint32_t BVal =
B->getZExtValue();
2750 uint32_t CVal =
C->getZExtValue();
2752 if (0 < BVal && BVal <= CVal && CVal < 32) {
2762void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2763 switch (
N->getOpcode()) {
2765 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2768 const SDValue &Srl =
N->getOperand(0);
2772 if (Shift && Mask) {
2774 uint32_t MaskVal =
Mask->getZExtValue();
2786 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2793 if (Shift && Mask) {
2795 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2804 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2805 SelectS_BFEFromShifts(
N);
2810 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2811 SelectS_BFEFromShifts(
N);
2826 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2836bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2838 if (!
N->hasOneUse())
2848 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2852 if (VT == MVT::i64) {
2855 Subtarget->hasScalarCompareEq64();
2858 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2891void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2894 if (
Cond.isUndef()) {
2895 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2896 N->getOperand(2),
N->getOperand(0));
2900 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2902 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2903 bool AndExec = !UseSCCBr;
2904 bool Negate =
false;
2907 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2922 bool NegatedBallot =
false;
2925 UseSCCBr = !BallotCond->isDivergent();
2926 Negate = Negate ^ NegatedBallot;
2941 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2942 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2943 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2962 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2964 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2972 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2977void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2978 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2979 !
N->isDivergent()) {
2981 if (Src.getValueType() == MVT::f16) {
2983 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2993void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2996 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2997 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
3002 MachineMemOperand *MMO =
M->getMemOperand();
3006 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
3011 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
3012 N = glueCopyToM0(
N, PtrBase);
3013 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
3018 N = glueCopyToM0(
N, Ptr);
3019 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
3024 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
3029 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3035void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
3038 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3039 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3040 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
3042 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3043 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
3045 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3046 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
3049 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
3050 N->getOperand(5),
N->getOperand(0)};
3053 MachineMemOperand *MMO =
M->getMemOperand();
3054 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3058void AMDGPUDAGToDAGISel::SelectTensorLoadStore(
SDNode *
N,
unsigned IntrID) {
3059 bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
3061 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3073 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3074 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3086 (void)
CurDAG->SelectNodeTo(
N,
Opc, MVT::Other, TensorOps);
3091 case Intrinsic::amdgcn_ds_gws_init:
3092 return AMDGPU::DS_GWS_INIT;
3093 case Intrinsic::amdgcn_ds_gws_barrier:
3094 return AMDGPU::DS_GWS_BARRIER;
3095 case Intrinsic::amdgcn_ds_gws_sema_v:
3096 return AMDGPU::DS_GWS_SEMA_V;
3097 case Intrinsic::amdgcn_ds_gws_sema_br:
3098 return AMDGPU::DS_GWS_SEMA_BR;
3099 case Intrinsic::amdgcn_ds_gws_sema_p:
3100 return AMDGPU::DS_GWS_SEMA_P;
3101 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3102 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3108void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3109 if (!Subtarget->hasGWS() ||
3110 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3111 !Subtarget->hasGWSSemaReleaseAll())) {
3118 const bool HasVSrc =
N->getNumOperands() == 4;
3119 assert(HasVSrc ||
N->getNumOperands() == 3);
3122 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3125 MachineMemOperand *MMO =
M->getMemOperand();
3138 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3139 ImmOffset = ConstOffset->getZExtValue();
3141 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3150 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3154 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3156 CurDAG->getTargetConstant(16, SL, MVT::i32));
3157 glueCopyToM0(
N,
SDValue(M0Base, 0));
3161 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3165 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3166 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3168 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3172 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3175 MVT DataVT =
Data.getValueType().getSimpleVT();
3176 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3178 Ops.push_back(
N->getOperand(2));
3184 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3186 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3188 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3191 SL, MVT::v2i32, RegSeqOps),
3196 Ops.push_back(OffsetField);
3197 Ops.push_back(Chain);
3199 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3203void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3204 if (Subtarget->getLDSBankCount() != 16) {
3234 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3237 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3238 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3244 SDNode *InterpP1LV =
3245 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3246 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3250 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3253 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3254 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3261void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3262 unsigned IntrID =
N->getConstantOperandVal(1);
3264 case Intrinsic::amdgcn_ds_append:
3265 case Intrinsic::amdgcn_ds_consume: {
3266 if (
N->getValueType(0) != MVT::i32)
3268 SelectDSAppendConsume(
N, IntrID);
3271 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3272 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3273 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3274 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3275 SelectDSBvhStackIntrinsic(
N, IntrID);
3277 case Intrinsic::amdgcn_init_whole_wave:
3278 CurDAG->getMachineFunction()
3279 .getInfo<SIMachineFunctionInfo>()
3280 ->setInitWholeWave();
3287void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3288 unsigned IntrID =
N->getConstantOperandVal(0);
3289 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3290 SDNode *ConvGlueNode =
N->getGluedNode();
3296 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3297 MVT::Glue,
SDValue(ConvGlueNode, 0));
3299 ConvGlueNode =
nullptr;
3302 case Intrinsic::amdgcn_wqm:
3303 Opcode = AMDGPU::WQM;
3305 case Intrinsic::amdgcn_softwqm:
3306 Opcode = AMDGPU::SOFT_WQM;
3308 case Intrinsic::amdgcn_wwm:
3309 case Intrinsic::amdgcn_strict_wwm:
3310 Opcode = AMDGPU::STRICT_WWM;
3312 case Intrinsic::amdgcn_strict_wqm:
3313 Opcode = AMDGPU::STRICT_WQM;
3315 case Intrinsic::amdgcn_interp_p1_f16:
3316 SelectInterpP1F16(
N);
3318 case Intrinsic::amdgcn_permlane16_swap:
3319 case Intrinsic::amdgcn_permlane32_swap: {
3320 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3321 !Subtarget->hasPermlane16Swap()) ||
3322 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3323 !Subtarget->hasPermlane32Swap())) {
3328 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3329 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3330 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3334 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3336 bool FI =
N->getConstantOperandVal(3);
3337 NewOps[2] =
CurDAG->getTargetConstant(
3340 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3348 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3350 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3355 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3356 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3360void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3361 unsigned IntrID =
N->getConstantOperandVal(1);
3363 case Intrinsic::amdgcn_ds_gws_init:
3364 case Intrinsic::amdgcn_ds_gws_barrier:
3365 case Intrinsic::amdgcn_ds_gws_sema_v:
3366 case Intrinsic::amdgcn_ds_gws_sema_br:
3367 case Intrinsic::amdgcn_ds_gws_sema_p:
3368 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3369 SelectDS_GWS(
N, IntrID);
3371 case Intrinsic::amdgcn_tensor_load_to_lds:
3372 case Intrinsic::amdgcn_tensor_store_from_lds:
3373 SelectTensorLoadStore(
N, IntrID);
3382void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3384 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3385 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3386 {N->getOperand(0), Log2WaveSize});
3389void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3404 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3406 if (
N->isDivergent()) {
3407 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3412 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3413 {SrcVal, Log2WaveSize}),
3417 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3421bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3423 bool IsCanonicalizing,
3424 bool AllowAbs)
const {
3430 Src = Src.getOperand(0);
3431 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3435 if (
LHS &&
LHS->isZero()) {
3437 Src = Src.getOperand(1);
3441 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3443 Src = Src.getOperand(0);
3456 if (IsCanonicalizing)
3471 EVT VT = Src.getValueType();
3473 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3480 auto ReplaceSrc = [&]() ->
SDValue {
3482 return Src.getOperand(0);
3487 Src.getValueType(),
LHS, Index);
3513 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3515 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3522bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3525 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3527 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3534bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3537 if (SelectVOP3ModsImpl(In, Src, Mods,
3540 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3547bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3555bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3559 if (SelectVOP3ModsImpl(In, Src, Mods,
3564 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3571bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3573 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3576bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3578 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3581bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3585 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3586 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3588 return SelectVOP3Mods(In, Src, SrcMods);
3591bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3595 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3596 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3598 return SelectVOP3BMods(In, Src, SrcMods);
3601bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3606 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3607 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3612bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3613 SDValue &SrcMods,
bool IsDOT)
const {
3620 Src = Src.getOperand(0);
3624 bool HasOpSel = Src.getValueSizeInBits() != 128;
3627 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3628 unsigned VecMods = Mods;
3630 SDValue Lo = stripBitcast(Src.getOperand(0));
3631 SDValue Hi = stripBitcast(Src.getOperand(1));
3634 Lo = stripBitcast(
Lo.getOperand(0));
3639 Hi = stripBitcast(
Hi.getOperand(0));
3651 unsigned VecSize = Src.getValueSizeInBits();
3652 Lo = stripExtractLoElt(
Lo);
3653 Hi = stripExtractLoElt(
Hi);
3655 if (
Lo.getValueSizeInBits() > VecSize) {
3656 Lo =
CurDAG->getTargetExtractSubreg(
3657 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3661 if (
Hi.getValueSizeInBits() > VecSize) {
3662 Hi =
CurDAG->getTargetExtractSubreg(
3663 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3667 assert(
Lo.getValueSizeInBits() <= VecSize &&
3668 Hi.getValueSizeInBits() <= VecSize);
3670 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3674 if (VecSize ==
Lo.getValueSizeInBits()) {
3676 }
else if (VecSize == 32) {
3677 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3679 assert((
Lo.getValueSizeInBits() == 32 && VecSize == 64) ||
3680 (
Lo.getValueSizeInBits() == 64 && VecSize == 128));
3684 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3685 Lo.getValueType()), 0);
3686 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3691 auto RC = (
Lo->isDivergent() || !HasOpSel)
3692 ?
TRI->getVGPRClassForBitWidth(VecSize)
3693 :
TRI->getSGPRClassForBitWidth(VecSize);
3694 unsigned NumRegs =
Lo.getValueSizeInBits() == 32 ? 1 : 2;
3696 CurDAG->getTargetConstant(RC->getID(), SL, MVT::i32),
Lo,
3697 CurDAG->getTargetConstant(
TRI->getSubRegFromChannel(0, NumRegs), SL,
3700 CurDAG->getTargetConstant(
3701 TRI->getSubRegFromChannel(NumRegs, NumRegs), SL, MVT::i32)};
3703 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3704 Src.getValueType(),
Ops), 0);
3706 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3712 .bitcastToAPInt().getZExtValue();
3714 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3715 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3722 Src.getNumOperands() == 2) {
3727 assert(Src.getValueSizeInBits() != 128 &&
3728 "<2 x 64> VECTOR_SHUFFLE should not be legal.");
3731 ArrayRef<int>
Mask = SVN->getMask();
3733 if (Mask[0] < 2 && Mask[1] < 2) {
3735 SDValue ShuffleSrc = SVN->getOperand(0);
3748 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3756 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3760bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3762 return SelectVOP3PMods(In, Src, SrcMods,
true);
3765bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(
SDValue In,
SDValue &Src)
const {
3767 SelectVOP3PMods(In, SrcTmp, SrcModsTmp,
true);
3776bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(
SDValue In,
SDValue &Src,
3778 SelectVOP3Mods(In, Src, SrcMods);
3781 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3785bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(
SDValue In,
SDValue &Src)
const {
3787 SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
3796bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3799 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3802 unsigned SrcVal =
C->getZExtValue();
3806 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3813 unsigned DstRegClass;
3815 switch (Elts.
size()) {
3817 DstRegClass = AMDGPU::VReg_256RegClassID;
3821 DstRegClass = AMDGPU::VReg_128RegClassID;
3825 DstRegClass = AMDGPU::VReg_64RegClassID;
3833 Ops.push_back(
CurDAG->getTargetConstant(DstRegClass,
DL, MVT::i32));
3834 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3835 Ops.push_back(Elts[i]);
3836 Ops.push_back(
CurDAG->getTargetConstant(
3839 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, DstTy,
Ops);
3846 assert(
"unhandled Reg sequence size" &&
3847 (Elts.
size() == 8 || Elts.
size() == 16));
3851 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3852 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3857 if (Subtarget->useRealTrue16Insts()) {
3862 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i16),
3865 emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID, MVT::i32,
3866 {Elts[i],
Undef}, {AMDGPU::lo16, AMDGPU::hi16},
DL);
3867 Elts[i + 1] = emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID,
3868 MVT::i32, {Elts[i + 1],
Undef},
3869 {AMDGPU::lo16, AMDGPU::hi16},
DL);
3871 SDValue PackLoLo =
CurDAG->getTargetConstant(0x05040100,
DL, MVT::i32);
3873 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64,
DL, MVT::i32,
3874 {Elts[i + 1], Elts[i], PackLoLo});
3878 return buildRegSequence32(PackedElts,
DL);
3884 unsigned ElementSize)
const {
3885 if (ElementSize == 16)
3886 return buildRegSequence16(Elts,
DL);
3887 if (ElementSize == 32)
3888 return buildRegSequence32(Elts,
DL);
3892void AMDGPUDAGToDAGISel::selectWMMAModsNegAbs(
unsigned ModOpcode,
3896 unsigned ElementSize)
const {
3901 for (
auto El : Elts) {
3904 NegAbsElts.
push_back(El->getOperand(0));
3906 if (Elts.size() != NegAbsElts.
size()) {
3908 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3912 Src =
SDValue(buildRegSequence(NegAbsElts,
DL, ElementSize), 0);
3918 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3926 std::function<
bool(
SDValue)> ModifierCheck) {
3930 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3931 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3932 if (!ModifierCheck(ElF16))
3939bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3957 Src =
SDValue(buildRegSequence16(EltsF16, SDLoc(In)), 0);
3976 Src =
SDValue(buildRegSequence32(EltsV2F16, SDLoc(In)), 0);
3982 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3986bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3997 if (EltsF16.
empty())
4007 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, SDLoc(In), 16);
4017 if (EltsV2F16.
empty())
4026 selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, SDLoc(In), 32);
4029 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4033bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
4043 unsigned ModOpcode =
4054 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, SDLoc(In), 32);
4057 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4061bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
4063 BitVector UndefElements;
4065 if (isInlineImmediate(
Splat.getNode())) {
4067 unsigned Imm =
C->getAPIntValue().getSExtValue();
4068 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4072 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
4073 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4081 SDValue SplatSrc32 = stripBitcast(In);
4083 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
4084 SDValue SplatSrc16 = stripBitcast(Splat32);
4087 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
4088 std::optional<APInt> RawValue;
4090 RawValue =
C->getValueAPF().bitcastToAPInt();
4092 RawValue =
C->getAPIntValue();
4094 if (RawValue.has_value()) {
4095 EVT VT =
In.getValueType().getScalarType();
4101 if (
TII->isInlineConstant(FloatVal)) {
4102 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4107 if (
TII->isInlineConstant(RawValue.value())) {
4108 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4121 if (
CurDAG->isConstantIntBuildVectorOrConstantInt(SplatSrc32)) {
4126 int64_t LoImm = Lo32->getAPIntValue().getSExtValue();
4127 int64_t HiImm = Hi32->getAPIntValue().getSExtValue();
4128 int64_t Imm64I = (HiImm << 32) + LoImm;
4130 if (!isInlineImmediate(APInt(64, Imm64I)))
4133 }
else if (Imm64I != Imm64)
4137 Src =
CurDAG->getTargetConstant(Imm64, SDLoc(In), MVT::i64);
4144bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
4150 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4159 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4163bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
4169 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4178 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4182bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4190 const SDValue &ExtendSrc =
In.getOperand(0);
4194 const SDValue &CastSrc =
In.getOperand(0);
4198 if (Zero &&
Zero->getZExtValue() == 0)
4209 Src = ExtractVecEltSrc;
4213 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4217bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4221 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4225bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4228 return SelectVOP3Mods(In, Src, SrcMods);
4240 Op =
Op.getOperand(0);
4242 IsExtractHigh =
false;
4245 if (!Low16 || !Low16->isZero())
4247 Op = stripBitcast(
Op.getOperand(1));
4248 if (
Op.getValueType() != MVT::bf16)
4253 if (
Op.getValueType() != MVT::i32)
4258 if (Mask->getZExtValue() == 0xffff0000) {
4259 IsExtractHigh =
true;
4260 return Op.getOperand(0);
4269 return Op.getOperand(0);
4278bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4282 SelectVOP3ModsImpl(In, Src, Mods);
4284 bool IsExtractHigh =
false;
4286 Src = Src.getOperand(0);
4287 }
else if (VT == MVT::bf16) {
4295 if (Src.getValueType() != VT &&
4296 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4299 Src = stripBitcast(Src);
4305 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4320 if (Src.getValueSizeInBits() == 16) {
4329 Src.getOperand(0).getValueType() == MVT::i32) {
4330 Src = Src.getOperand(0);
4334 if (Subtarget->useRealTrue16Insts())
4336 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4337 }
else if (IsExtractHigh)
4343bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4346 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4348 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4352bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4355 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4356 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4360bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4363 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4365 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4369bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4372 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4373 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4381 unsigned NumOpcodes = 0;
4394 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4397 if (
C->isAllOnes()) {
4407 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4421 if (Src.size() == 3) {
4427 if (
C->isAllOnes()) {
4429 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4430 if (Src[
I] ==
LHS) {
4442 Bits = SrcBits[Src.size()];
4447 switch (In.getOpcode()) {
4455 if (!getOperandBits(
LHS, LHSBits) ||
4456 !getOperandBits(
RHS, RHSBits)) {
4457 Src = std::move(Backup);
4458 return std::make_pair(0, 0);
4479 uint8_t LHSBitsOrig = LHSBits;
4480 uint8_t RHSBitsOrig = RHSBits;
4484 NumOpcodes += LHSOp.first;
4485 LHSBits = LHSOp.second;
4492 NumOpcodes += RHSOp.first;
4493 RHSBits = RHSOp.second;
4497 auto dependsOnSlot = [](
uint8_t TT,
int Slot) ->
bool {
4498 if (Slot < 0 || Slot > 2)
4500 const uint8_t Masks[3] = {0x0f, 0x33, 0x55};
4501 const int Shifts[3] = {4, 2, 1};
4502 return ((TT ^ (TT >> Shifts[Slot])) & Masks[Slot]) != 0;
4508 const uint8_t SrcBitsConst[3] = {0xf0, 0xcc, 0xaa};
4515 NegatedInner =
Op.getOperand(0);
4516 for (
int I = 0;
I < (int)S.size();
I++) {
4517 if (Bits == SrcBitsConst[
I] && S[
I] ==
Op)
4519 if (IsNegationOp && Bits == (
uint8_t)~SrcBitsConst[
I] &&
4520 S[
I] == NegatedInner)
4531 for (
int I = 0;
I < (int)SrcAfterLHS.
size() &&
I < 3;
I++) {
4532 if (
I < (
int)Src.size() && Src[
I] != SrcAfterLHS[
I] &&
4533 dependsOnSlot(LHSBits,
I)) {
4542 if (!Stale && !RHSOp.first) {
4543 int Slot = findSlot(RHSBitsOrig,
RHS, SrcBeforeRecurse);
4545 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4551 if (!Stale && !LHSOp.first) {
4552 int Slot = findSlot(LHSBitsOrig,
LHS, SrcBeforeRecurse);
4554 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4559 Src = std::move(SrcBeforeRecurse);
4560 LHSBits = LHSBitsOrig;
4561 RHSBits = RHSBitsOrig;
4567 return std::make_pair(0, 0);
4571 switch (In.getOpcode()) {
4573 TTbl = LHSBits & RHSBits;
4576 TTbl = LHSBits | RHSBits;
4579 TTbl = LHSBits ^ RHSBits;
4585 return std::make_pair(NumOpcodes + 1, TTbl);
4592 unsigned NumOpcodes;
4594 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4598 if (NumOpcodes < 2 || Src.empty())
4604 if (NumOpcodes < 4 && !In->isDivergent())
4607 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4612 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4613 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4627 while (Src.size() < 3)
4628 Src.push_back(Src[0]);
4634 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4640 return CurDAG->getUNDEF(MVT::i32);
4644 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4649 return CurDAG->getConstant(
4650 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4660bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4661 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4663 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4664 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4667 bool AllUsesAcceptSReg =
true;
4669 Limit < 10 && U !=
E; ++U, ++Limit) {
4670 const TargetRegisterClass *RC =
4671 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4679 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4680 RC != &AMDGPU::VS_64_Align2RegClass) {
4681 AllUsesAcceptSReg =
false;
4682 SDNode *
User =
U->getUser();
4683 if (
User->isMachineOpcode()) {
4684 unsigned Opc =
User->getMachineOpcode();
4685 const MCInstrDesc &
Desc = SII->get(
Opc);
4686 if (
Desc.isCommutable()) {
4687 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4690 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4691 const TargetRegisterClass *CommutedRC =
4692 getOperandRegClass(
U->getUser(), CommutedOpNo);
4693 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4694 CommutedRC == &AMDGPU::VS_64RegClass ||
4695 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4696 AllUsesAcceptSReg =
true;
4704 if (!AllUsesAcceptSReg)
4708 return !AllUsesAcceptSReg && (Limit < 10);
4711bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4713 const MachineMemOperand *MMO = Ld->getMemOperand();
4731 (Subtarget->getScalarizeGlobalBehavior() &&
4735 ->isMemOpHasNoClobberedMemOperand(
N)));
4741 bool IsModified =
false;
4747 while (Position !=
CurDAG->allnodes_end()) {
4754 if (ResNode !=
Node) {
4760 CurDAG->RemoveDeadNodes();
4761 }
while (IsModified);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
bool isSDWAOperand(const SDNode *N) const
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static SDValue stripBitcast(SDValue Val)
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ CONVERGENCECTRL_GLUE
This does not correspond to any convergence control intrinsic.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.