29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
135 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
139#ifdef EXPENSIVE_CHECKS
144 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
165bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
199 case AMDGPUISD::FRACT:
200 case AMDGPUISD::CLAMP:
201 case AMDGPUISD::COS_HW:
202 case AMDGPUISD::SIN_HW:
203 case AMDGPUISD::FMIN3:
204 case AMDGPUISD::FMAX3:
205 case AMDGPUISD::FMED3:
206 case AMDGPUISD::FMAD_FTZ:
209 case AMDGPUISD::RCP_IFLAG:
219 case AMDGPUISD::DIV_FIXUP:
229#ifdef EXPENSIVE_CHECKS
233 assert(L->isLCSSAForm(DT));
241#ifdef EXPENSIVE_CHECKS
249 assert(Subtarget->d16PreservesUnusedBits());
250 MVT VT =
N->getValueType(0).getSimpleVT();
251 if (VT != MVT::v2i16 && VT != MVT::v2f16)
273 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
276 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
282 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
295 if (LdLo &&
Lo.hasOneUse()) {
301 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
304 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
316 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
329 if (!Subtarget->d16PreservesUnusedBits())
334 bool MadeChange =
false;
335 while (Position !=
CurDAG->allnodes_begin()) {
340 switch (
N->getOpcode()) {
351 CurDAG->RemoveDeadNodes();
357bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
363 return TII->isInlineConstant(
C->getAPIntValue());
366 return TII->isInlineConstant(
C->getValueAPF());
376 unsigned OpNo)
const {
377 if (!
N->isMachineOpcode()) {
380 if (
Reg.isVirtual()) {
382 return MRI.getRegClass(
Reg);
385 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
386 return TRI->getPhysRegBaseClass(
Reg);
392 switch (
N->getMachineOpcode()) {
394 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
395 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
396 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
400 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
404 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
406 case AMDGPU::REG_SEQUENCE: {
407 unsigned RCID =
N->getConstantOperandVal(0);
408 const TargetRegisterClass *SuperRC =
409 Subtarget->getRegisterInfo()->getRegClass(RCID);
411 SDValue SubRegOp =
N->getOperand(OpNo + 1);
413 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
422 Ops.push_back(NewChain);
423 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
424 Ops.push_back(
N->getOperand(i));
427 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
434 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
437 return glueCopyToOp(
N,
M0,
M0.getValue(1));
440SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
443 if (Subtarget->ldsRequiresM0Init())
445 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
447 MachineFunction &
MF =
CurDAG->getMachineFunction();
448 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
450 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
457 SDNode *
Lo =
CurDAG->getMachineNode(
458 AMDGPU::S_MOV_B32,
DL, MVT::i32,
460 SDNode *
Hi =
CurDAG->getMachineNode(
461 AMDGPU::S_MOV_B32,
DL, MVT::i32,
464 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
468 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
471SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
476 uint32_t LHSVal, RHSVal;
480 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
482 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
490 EVT VT =
N->getValueType(0);
494 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
496 if (NumVectorElts == 1) {
497 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
502 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
503 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
506 bool AllConst =
true;
508 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
516 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
519 C |= Val << (EltSize *
I);
524 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
525 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
531 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
538 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
539 bool IsRegSeq =
true;
540 unsigned NOps =
N->getNumOperands();
541 for (
unsigned i = 0; i < NOps; i++) {
549 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
550 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
552 if (NOps != NumVectorElts) {
557 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
560 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
561 RegSeqArgs[1 + (2 * i) + 1] =
568 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
572 EVT VT =
N->getValueType(0);
576 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
590 Mask[0] < 4 && Mask[1] < 4);
592 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
593 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
594 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
595 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
598 Src0SubReg = Src1SubReg;
600 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
605 Src1SubReg = Src0SubReg;
607 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
617 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
618 Src1SubReg == AMDGPU::sub0) {
634 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
635 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
638 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
639 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
649 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
651 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
654 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
655 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
656 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
657 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
661 unsigned int Opc =
N->getOpcode();
662 if (
N->isMachineOpcode()) {
670 N = glueCopyToM0LDSInit(
N);
685 if (
N->getValueType(0) != MVT::i64)
688 SelectADD_SUB_I64(
N);
693 if (
N->getValueType(0) != MVT::i32)
700 SelectUADDO_USUBO(
N);
703 case AMDGPUISD::FMUL_W_CHAIN: {
704 SelectFMUL_W_CHAIN(
N);
707 case AMDGPUISD::FMA_W_CHAIN: {
708 SelectFMA_W_CHAIN(
N);
714 EVT VT =
N->getValueType(0);
731 ?
TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
743 if (
N->getValueType(0) == MVT::i128) {
744 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
745 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
746 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
747 }
else if (
N->getValueType(0) == MVT::i64) {
748 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
749 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
750 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
754 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
755 N->getOperand(1), SubReg1 };
757 N->getValueType(0),
Ops));
763 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
764 Subtarget->has64BitLiterals())
769 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
774 Imm =
C->getZExtValue();
783 case AMDGPUISD::BFE_I32:
784 case AMDGPUISD::BFE_U32: {
810 case AMDGPUISD::DIV_SCALE: {
821 return SelectMUL_LOHI(
N);
832 if (
N->getValueType(0) != MVT::i32)
843 case AMDGPUISD::CVT_PKRTZ_F16_F32:
844 case AMDGPUISD::CVT_PKNORM_I16_F32:
845 case AMDGPUISD::CVT_PKNORM_U16_F32:
846 case AMDGPUISD::CVT_PK_U16_U32:
847 case AMDGPUISD::CVT_PK_I16_I32: {
849 if (
N->getValueType(0) == MVT::i32) {
850 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
852 { N->getOperand(0), N->getOperand(1) });
860 SelectINTRINSIC_W_CHAIN(
N);
864 SelectINTRINSIC_WO_CHAIN(
N);
868 SelectINTRINSIC_VOID(
N);
872 SelectWAVE_ADDRESS(
N);
876 SelectSTACKRESTORE(
N);
884bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
887 return Term->getMetadata(
"amdgpu.uniform") ||
888 Term->getMetadata(
"structurizecfg.uniform");
891bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
892 unsigned ShAmtBits)
const {
895 const APInt &
RHS =
N->getConstantOperandAPInt(1);
896 if (
RHS.countr_one() >= ShAmtBits)
926 N1 =
Lo.getOperand(1);
936 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
951 return "AMDGPU DAG->DAG Pattern Instruction Selection";
961#ifdef EXPENSIVE_CHECKS
967 for (
auto &L : LI.getLoopsInPreorder())
968 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
990 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
992 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1006SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1008 SDNode *Mov =
CurDAG->getMachineNode(
1009 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1010 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1015void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
1020 unsigned Opcode =
N->getOpcode();
1029 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1030 DL, MVT::i32,
LHS, Sub0);
1031 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1032 DL, MVT::i32,
LHS, Sub1);
1034 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1035 DL, MVT::i32,
RHS, Sub0);
1036 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1037 DL, MVT::i32,
RHS, Sub1);
1039 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1041 static const unsigned OpcMap[2][2][2] = {
1042 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1043 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1044 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1045 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1047 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1048 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1051 if (!ConsumeCarry) {
1053 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1056 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1063 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1066 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1073 MVT::i64, RegSequenceArgs);
1084void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1089 if (
N->isDivergent()) {
1091 : AMDGPU::V_SUBB_U32_e64;
1093 N,
Opc,
N->getVTList(),
1095 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1098 : AMDGPU::S_SUB_CO_PSEUDO;
1099 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1103void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1108 bool IsVALU =
N->isDivergent();
1110 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1112 if (UI.getUse().getResNo() == 1) {
1113 if (UI->isMachineOpcode()) {
1114 if (UI->getMachineOpcode() !=
1115 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1128 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1131 N,
Opc,
N->getVTList(),
1132 {N->getOperand(0), N->getOperand(1),
1133 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1135 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1137 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1138 {N->getOperand(0), N->getOperand(1)});
1142void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1146 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1147 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1148 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1149 Ops[8] =
N->getOperand(0);
1150 Ops[9] =
N->getOperand(4);
1154 bool UseFMAC = Subtarget->hasDLInsts() &&
1158 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1159 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1162void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1166 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1167 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1168 Ops[6] =
N->getOperand(0);
1169 Ops[7] =
N->getOperand(3);
1171 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1176void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1177 EVT VT =
N->getValueType(0);
1179 assert(VT == MVT::f32 || VT == MVT::f64);
1182 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1187 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1188 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1189 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1195void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1199 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && !
N->hasAnyUseOfValue(1);
1200 if (Subtarget->hasMADIntraFwdBug())
1201 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1202 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1203 else if (UseNoCarry)
1204 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1206 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1209 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1213 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1224void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1229 if (Subtarget->hasMadU64U32NoCarry()) {
1230 VTList =
CurDAG->getVTList(MVT::i64);
1231 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1233 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1234 if (Subtarget->hasMADIntraFwdBug()) {
1235 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1236 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1238 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1245 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1247 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1248 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1249 MVT::i32,
SDValue(Mad, 0), Sub0);
1253 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1254 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1255 MVT::i32,
SDValue(Mad, 0), Sub1);
1265 if (!
Base || Subtarget->hasUsableDSOffset() ||
1266 Subtarget->unsafeDSOffsetFoldingEnabled())
1277 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1290 int64_t ByteOffset =
C->getSExtValue();
1291 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1300 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1306 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1307 if (Subtarget->hasAddNoCarryInsts()) {
1308 SubOp = AMDGPU::V_SUB_U32_e64;
1310 CurDAG->getTargetConstant(0, {}, MVT::i1));
1313 MachineSDNode *MachineSub =
1314 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1330 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1332 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1333 DL, MVT::i32, Zero);
1335 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1342 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1346bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1348 unsigned Size)
const {
1349 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1354 if (!
Base || Subtarget->hasUsableDSOffset() ||
1355 Subtarget->unsafeDSOffsetFoldingEnabled())
1373bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1379 if (Subtarget->hasSignedScratchOffsets())
1389 ConstantSDNode *ImmOp =
nullptr;
1400bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1406 if (Subtarget->hasSignedScratchOffsets())
1416bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1430 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1433 auto LHS =
Base.getOperand(0);
1434 auto RHS =
Base.getOperand(1);
1442 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1448 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1453 unsigned Size)
const {
1456 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1461 unsigned OffsetValue1 = OffsetValue0 +
Size;
1464 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1466 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1467 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1472 if (
const ConstantSDNode *
C =
1474 unsigned OffsetValue0 =
C->getZExtValue();
1475 unsigned OffsetValue1 = OffsetValue0 +
Size;
1477 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1487 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1491 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1492 if (Subtarget->hasAddNoCarryInsts()) {
1493 SubOp = AMDGPU::V_SUB_U32_e64;
1495 CurDAG->getTargetConstant(0, {}, MVT::i1));
1498 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1503 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1505 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1511 unsigned OffsetValue0 = CAddr->getZExtValue();
1512 unsigned OffsetValue1 = OffsetValue0 +
Size;
1514 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1516 MachineSDNode *MovZero =
1517 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1519 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1520 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1528 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1529 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1539 if (Subtarget->useFlatForGlobal())
1544 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1545 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1546 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1547 SOffset = Subtarget->hasRestrictedSOffset()
1548 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1549 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1551 ConstantSDNode *C1 =
nullptr;
1553 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1566 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1572 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1588 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1590 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1594 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1604 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1615 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1621bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1624 SDValue Ptr, Offen, Idxen, Addr64;
1628 if (!Subtarget->hasAddr64())
1631 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1635 if (
C->getSExtValue()) {
1648std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1653 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1659 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1662bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1668 MachineFunction &
MF =
CurDAG->getMachineFunction();
1669 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1671 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1674 int64_t
Imm = CAddr->getSExtValue();
1675 const int64_t NullPtr =
1678 if (Imm != NullPtr) {
1681 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1682 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1683 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1684 VAddr =
SDValue(MovHighBits, 0);
1686 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1687 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1692 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1713 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1714 if (
TII->isLegalMUBUFImmOffset(C1) &&
1715 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1716 CurDAG->SignBitIsZero(N0))) {
1717 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1718 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1724 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1725 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1733 if (!
Reg.isPhysical())
1735 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1736 return RC &&
TRI.isSGPRClass(RC);
1739bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1744 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1745 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1746 MachineFunction &
MF =
CurDAG->getMachineFunction();
1747 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1752 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1758 ConstantSDNode *CAddr;
1771 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1776 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1782bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1785 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1786 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1788 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1794 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1807bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1809 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1810 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1814 SOffset = ByteOffsetNode;
1832bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
SDNode *
N,
SDValue Addr,
1834 uint64_t FlatVariant)
const {
1835 int64_t OffsetVal = 0;
1839 bool CanHaveFlatSegmentOffsetBug =
1840 Subtarget->hasFlatSegmentOffsetBug() &&
1844 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1846 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1848 isFlatScratchBaseLegal(Addr))) {
1857 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1858 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1860 OffsetVal = COffsetVal;
1873 uint64_t RemainderOffset;
1875 std::tie(OffsetVal, RemainderOffset) =
1876 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1879 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1886 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1887 if (Subtarget->hasAddNoCarryInsts()) {
1888 AddOp = AMDGPU::V_ADD_U32_e64;
1897 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
1899 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
1901 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1902 DL, MVT::i32, N0, Sub0);
1903 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1904 DL, MVT::i32, N0, Sub1);
1907 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1909 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1912 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1913 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1915 SDNode *Addc =
CurDAG->getMachineNode(
1916 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1920 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
1925 MVT::i64, RegSequenceArgs),
1934 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1938bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1944bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1950bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1953 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1961 if (
Op.getValueType() == MVT::i32)
1976bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
1979 bool NeedIOffset)
const {
1980 int64_t ImmOffset = 0;
1981 ScaleOffset =
false;
1987 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
1989 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1995 ImmOffset = COffsetVal;
1996 }
else if (!
LHS->isDivergent()) {
1997 if (COffsetVal > 0) {
2002 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2004 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2008 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2010 SDNode *VMov =
CurDAG->getMachineNode(
2011 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2012 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2015 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2025 unsigned NumLiterals =
2026 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2027 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2028 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2037 if (!
LHS->isDivergent()) {
2040 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2042 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2049 if (!SAddr && !
RHS->isDivergent()) {
2051 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2053 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2060 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2065 if (Subtarget->hasScaleOffset() &&
2066 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2081 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2094 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2095 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2097 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2101bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2106 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2114bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2119 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2124 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2130bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2136 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2141 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2147bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2152 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2156 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2160bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2166 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2172 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2178bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2184 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2205 FI->getValueType(0));
2215bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2223 int64_t COffsetVal = 0;
2225 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2234 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2238 int64_t SplitImmOffset, RemainderOffset;
2239 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2242 COffsetVal = SplitImmOffset;
2246 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2247 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2248 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2253 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2259bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2261 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2267 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2274 return (VMax & 3) + (
SMax & 3) >= 4;
2277bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2281 int64_t ImmOffset = 0;
2285 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2287 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2292 ImmOffset = COffsetVal;
2293 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2297 int64_t SplitImmOffset, RemainderOffset;
2298 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2302 SDNode *VMov =
CurDAG->getMachineNode(
2303 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2304 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2307 if (!isFlatScratchBaseLegal(Addr))
2309 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2311 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2312 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2324 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2327 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2334 if (OrigAddr != Addr) {
2335 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2338 if (!isFlatScratchBaseLegalSV(OrigAddr))
2342 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2345 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2347 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2356bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2359 int64_t ImmOffset)
const {
2360 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2362 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2374 bool IsSigned)
const {
2375 bool ScaleOffset =
false;
2376 if (!Subtarget->hasScaleOffset() || !
Offset)
2390 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2391 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2392 (
Offset.isMachineOpcode() &&
2393 Offset.getMachineOpcode() ==
2394 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2395 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2397 ScaleOffset =
C->getZExtValue() ==
Size;
2409bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2411 bool Imm32Only,
bool IsBuffer,
2412 bool HasSOffset, int64_t ImmOffset,
2413 bool *ScaleOffset)
const {
2415 "Cannot match both soffset and offset at the same time!");
2420 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2430 *SOffset = ByteOffsetNode;
2431 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2437 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2444 SDLoc SL(ByteOffsetNode);
2448 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2450 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2451 if (EncodedOffset &&
Offset && !Imm32Only) {
2452 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2461 if (EncodedOffset &&
Offset && Imm32Only) {
2462 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2470 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2472 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2479SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2486 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2487 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2488 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2489 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2492 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2494 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2495 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2497 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2500 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2507bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2510 bool IsBuffer,
bool HasSOffset,
2512 bool *ScaleOffset)
const {
2514 assert(!Imm32Only && !IsBuffer);
2517 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2522 ImmOff =
C->getSExtValue();
2524 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2525 true, ImmOff, ScaleOffset);
2545 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2546 ImmOffset, ScaleOffset)) {
2550 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2551 ImmOffset, ScaleOffset)) {
2560 bool Imm32Only,
bool *ScaleOffset)
const {
2561 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2564 SBase = Expand32BitAddress(SBase);
2569 SBase = Expand32BitAddress(Addr);
2570 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2577bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2579 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2583bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2586 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2593 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2594 false, &ScaleOffset))
2598 SDLoc(
N), MVT::i32);
2602bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2607 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2611 SDLoc(
N), MVT::i32);
2616 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2620bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2623 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2627bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2631 return N.getValueType() == MVT::i32 &&
2632 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2637bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2642 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2667SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2671 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2675 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2677 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2681 uint32_t PackedVal =
Offset | (Width << 16);
2682 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2684 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2687void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2692 const SDValue &Shl =
N->getOperand(0);
2697 uint32_t BVal =
B->getZExtValue();
2698 uint32_t CVal =
C->getZExtValue();
2700 if (0 < BVal && BVal <= CVal && CVal < 32) {
2710void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2711 switch (
N->getOpcode()) {
2713 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2716 const SDValue &Srl =
N->getOperand(0);
2720 if (Shift && Mask) {
2722 uint32_t MaskVal =
Mask->getZExtValue();
2734 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2741 if (Shift && Mask) {
2743 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2752 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2753 SelectS_BFEFromShifts(
N);
2758 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2759 SelectS_BFEFromShifts(
N);
2774 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2784bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2786 if (!
N->hasOneUse())
2796 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2800 if (VT == MVT::i64) {
2803 Subtarget->hasScalarCompareEq64();
2806 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2839void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2842 if (
Cond.isUndef()) {
2843 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2844 N->getOperand(2),
N->getOperand(0));
2848 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2850 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2851 bool AndExec = !UseSCCBr;
2852 bool Negate =
false;
2855 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2870 bool NegatedBallot =
false;
2873 UseSCCBr = !BallotCond->isDivergent();
2874 Negate = Negate ^ NegatedBallot;
2889 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2890 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2891 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2910 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2912 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2920 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2925void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2926 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2927 !
N->isDivergent()) {
2929 if (Src.getValueType() == MVT::f16) {
2931 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2941void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2944 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2945 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2950 MachineMemOperand *MMO =
M->getMemOperand();
2954 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
2959 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2960 N = glueCopyToM0(
N, PtrBase);
2961 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2966 N = glueCopyToM0(
N, Ptr);
2967 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2972 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2977 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
2983void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
2986 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2987 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2988 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2990 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2991 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2993 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2994 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2997 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2998 N->getOperand(5),
N->getOperand(0)};
3001 MachineMemOperand *MMO =
M->getMemOperand();
3002 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3006void AMDGPUDAGToDAGISel::SelectTensorLoadStore(
SDNode *
N,
unsigned IntrID) {
3007 bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
3009 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS : AMDGPU::TENSOR_STORE_FROM_LDS;
3021 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_D2
3022 : AMDGPU::TENSOR_STORE_FROM_LDS_D2;
3034 (void)
CurDAG->SelectNodeTo(
N,
Opc, MVT::Other, TensorOps);
3039 case Intrinsic::amdgcn_ds_gws_init:
3040 return AMDGPU::DS_GWS_INIT;
3041 case Intrinsic::amdgcn_ds_gws_barrier:
3042 return AMDGPU::DS_GWS_BARRIER;
3043 case Intrinsic::amdgcn_ds_gws_sema_v:
3044 return AMDGPU::DS_GWS_SEMA_V;
3045 case Intrinsic::amdgcn_ds_gws_sema_br:
3046 return AMDGPU::DS_GWS_SEMA_BR;
3047 case Intrinsic::amdgcn_ds_gws_sema_p:
3048 return AMDGPU::DS_GWS_SEMA_P;
3049 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3050 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3056void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3057 if (!Subtarget->hasGWS() ||
3058 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3059 !Subtarget->hasGWSSemaReleaseAll())) {
3066 const bool HasVSrc =
N->getNumOperands() == 4;
3067 assert(HasVSrc ||
N->getNumOperands() == 3);
3070 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3073 MachineMemOperand *MMO =
M->getMemOperand();
3086 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3087 ImmOffset = ConstOffset->getZExtValue();
3089 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3098 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3102 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3104 CurDAG->getTargetConstant(16, SL, MVT::i32));
3105 glueCopyToM0(
N,
SDValue(M0Base, 0));
3109 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3113 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3114 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3116 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3120 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3123 MVT DataVT =
Data.getValueType().getSimpleVT();
3124 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3126 Ops.push_back(
N->getOperand(2));
3132 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3134 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3136 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3139 SL, MVT::v2i32, RegSeqOps),
3144 Ops.push_back(OffsetField);
3145 Ops.push_back(Chain);
3147 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3151void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3152 if (Subtarget->getLDSBankCount() != 16) {
3182 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3185 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3186 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3192 SDNode *InterpP1LV =
3193 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3194 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3198 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3201 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3202 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3209void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3210 unsigned IntrID =
N->getConstantOperandVal(1);
3212 case Intrinsic::amdgcn_ds_append:
3213 case Intrinsic::amdgcn_ds_consume: {
3214 if (
N->getValueType(0) != MVT::i32)
3216 SelectDSAppendConsume(
N, IntrID);
3219 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3220 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3221 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3222 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3223 SelectDSBvhStackIntrinsic(
N, IntrID);
3225 case Intrinsic::amdgcn_init_whole_wave:
3226 CurDAG->getMachineFunction()
3227 .getInfo<SIMachineFunctionInfo>()
3228 ->setInitWholeWave();
3235void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3236 unsigned IntrID =
N->getConstantOperandVal(0);
3237 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3238 SDNode *ConvGlueNode =
N->getGluedNode();
3244 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3245 MVT::Glue,
SDValue(ConvGlueNode, 0));
3247 ConvGlueNode =
nullptr;
3250 case Intrinsic::amdgcn_wqm:
3251 Opcode = AMDGPU::WQM;
3253 case Intrinsic::amdgcn_softwqm:
3254 Opcode = AMDGPU::SOFT_WQM;
3256 case Intrinsic::amdgcn_wwm:
3257 case Intrinsic::amdgcn_strict_wwm:
3258 Opcode = AMDGPU::STRICT_WWM;
3260 case Intrinsic::amdgcn_strict_wqm:
3261 Opcode = AMDGPU::STRICT_WQM;
3263 case Intrinsic::amdgcn_interp_p1_f16:
3264 SelectInterpP1F16(
N);
3266 case Intrinsic::amdgcn_permlane16_swap:
3267 case Intrinsic::amdgcn_permlane32_swap: {
3268 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3269 !Subtarget->hasPermlane16Swap()) ||
3270 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3271 !Subtarget->hasPermlane32Swap())) {
3276 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3277 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3278 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3282 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3284 bool FI =
N->getConstantOperandVal(3);
3285 NewOps[2] =
CurDAG->getTargetConstant(
3288 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3296 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3298 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3303 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3304 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3308void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3309 unsigned IntrID =
N->getConstantOperandVal(1);
3311 case Intrinsic::amdgcn_ds_gws_init:
3312 case Intrinsic::amdgcn_ds_gws_barrier:
3313 case Intrinsic::amdgcn_ds_gws_sema_v:
3314 case Intrinsic::amdgcn_ds_gws_sema_br:
3315 case Intrinsic::amdgcn_ds_gws_sema_p:
3316 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3317 SelectDS_GWS(
N, IntrID);
3319 case Intrinsic::amdgcn_tensor_load_to_lds:
3320 case Intrinsic::amdgcn_tensor_store_from_lds:
3321 SelectTensorLoadStore(
N, IntrID);
3330void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3332 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3333 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3334 {N->getOperand(0), Log2WaveSize});
3337void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3352 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3354 if (
N->isDivergent()) {
3355 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3360 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3361 {SrcVal, Log2WaveSize}),
3365 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3369bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3371 bool IsCanonicalizing,
3372 bool AllowAbs)
const {
3378 Src = Src.getOperand(0);
3379 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3383 if (
LHS &&
LHS->isZero()) {
3385 Src = Src.getOperand(1);
3389 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3391 Src = Src.getOperand(0);
3404 if (IsCanonicalizing)
3419 EVT VT = Src.getValueType();
3421 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3428 auto ReplaceSrc = [&]() ->
SDValue {
3430 return Src.getOperand(0);
3435 Src.getValueType(),
LHS, Index);
3461 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3463 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3470bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3473 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3475 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3482bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3485 if (SelectVOP3ModsImpl(In, Src, Mods,
3488 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3495bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3503bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3507 if (SelectVOP3ModsImpl(In, Src, Mods,
3512 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3519bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3521 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3524bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3526 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3529bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3533 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3534 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3536 return SelectVOP3Mods(In, Src, SrcMods);
3539bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3543 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3544 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3546 return SelectVOP3BMods(In, Src, SrcMods);
3549bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3554 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3555 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3560bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3561 SDValue &SrcMods,
bool IsDOT)
const {
3568 Src = Src.getOperand(0);
3572 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3573 unsigned VecMods = Mods;
3575 SDValue Lo = stripBitcast(Src.getOperand(0));
3576 SDValue Hi = stripBitcast(Src.getOperand(1));
3579 Lo = stripBitcast(
Lo.getOperand(0));
3584 Hi = stripBitcast(
Hi.getOperand(0));
3594 unsigned VecSize = Src.getValueSizeInBits();
3595 Lo = stripExtractLoElt(
Lo);
3596 Hi = stripExtractLoElt(
Hi);
3598 if (
Lo.getValueSizeInBits() > VecSize) {
3599 Lo =
CurDAG->getTargetExtractSubreg(
3600 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3604 if (
Hi.getValueSizeInBits() > VecSize) {
3605 Hi =
CurDAG->getTargetExtractSubreg(
3606 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3610 assert(
Lo.getValueSizeInBits() <= VecSize &&
3611 Hi.getValueSizeInBits() <= VecSize);
3613 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3617 if (VecSize ==
Lo.getValueSizeInBits()) {
3619 }
else if (VecSize == 32) {
3620 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3622 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3626 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3627 Lo.getValueType()), 0);
3628 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3629 : AMDGPU::SReg_64RegClassID;
3631 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3632 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3633 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3635 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3636 Src.getValueType(),
Ops), 0);
3638 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3644 .bitcastToAPInt().getZExtValue();
3646 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3647 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3654 Src.getNumOperands() == 2) {
3660 ArrayRef<int>
Mask = SVN->getMask();
3662 if (Mask[0] < 2 && Mask[1] < 2) {
3664 SDValue ShuffleSrc = SVN->getOperand(0);
3677 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3685 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3689bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3691 return SelectVOP3PMods(In, Src, SrcMods,
true);
3694bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3697 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3700 unsigned SrcVal =
C->getZExtValue();
3704 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3711 unsigned DstRegClass;
3713 switch (Elts.
size()) {
3715 DstRegClass = AMDGPU::VReg_256RegClassID;
3719 DstRegClass = AMDGPU::VReg_128RegClassID;
3723 DstRegClass = AMDGPU::VReg_64RegClassID;
3732 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3733 Ops.push_back(Elts[i]);
3744 assert(
"unhandled Reg sequence size" &&
3745 (Elts.
size() == 8 || Elts.
size() == 16));
3749 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3750 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3758 {Elts[i + 1], Elts[i], PackLoLo});
3768 const SDLoc &
DL,
unsigned ElementSize) {
3769 if (ElementSize == 16)
3771 if (ElementSize == 32)
3779 unsigned ElementSize) {
3784 for (
auto El : Elts) {
3787 NegAbsElts.
push_back(El->getOperand(0));
3789 if (Elts.size() != NegAbsElts.
size()) {
3809 std::function<
bool(
SDValue)> ModifierCheck) {
3813 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3814 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3815 if (!ModifierCheck(ElF16))
3822bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3865 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3869bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3880 if (EltsF16.
empty())
3901 if (EltsV2F16.
empty())
3914 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3918bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3928 unsigned ModOpcode =
3943 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3947bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3949 BitVector UndefElements;
3951 if (isInlineImmediate(
Splat.getNode())) {
3953 unsigned Imm =
C->getAPIntValue().getSExtValue();
3954 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3958 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3959 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3967 SDValue SplatSrc32 = stripBitcast(In);
3969 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3970 SDValue SplatSrc16 = stripBitcast(Splat32);
3973 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
3974 std::optional<APInt> RawValue;
3976 RawValue =
C->getValueAPF().bitcastToAPInt();
3978 RawValue =
C->getAPIntValue();
3980 if (RawValue.has_value()) {
3981 EVT VT =
In.getValueType().getScalarType();
3987 if (
TII->isInlineConstant(FloatVal)) {
3988 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3993 if (
TII->isInlineConstant(RawValue.value())) {
3994 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4007bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
4013 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4022 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4026bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
4032 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4041 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4045bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4053 const SDValue &ExtendSrc =
In.getOperand(0);
4057 const SDValue &CastSrc =
In.getOperand(0);
4061 if (Zero &&
Zero->getZExtValue() == 0)
4072 Src = ExtractVecEltSrc;
4076 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4080bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4084 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4088bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4091 return SelectVOP3Mods(In, Src, SrcMods);
4103 Op =
Op.getOperand(0);
4105 IsExtractHigh =
false;
4108 if (!Low16 || !Low16->isZero())
4110 Op = stripBitcast(
Op.getOperand(1));
4111 if (
Op.getValueType() != MVT::bf16)
4116 if (
Op.getValueType() != MVT::i32)
4121 if (Mask->getZExtValue() == 0xffff0000) {
4122 IsExtractHigh =
true;
4123 return Op.getOperand(0);
4132 return Op.getOperand(0);
4141bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4145 SelectVOP3ModsImpl(In, Src, Mods);
4147 bool IsExtractHigh =
false;
4149 Src = Src.getOperand(0);
4150 }
else if (VT == MVT::bf16) {
4158 if (Src.getValueType() != VT &&
4159 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4162 Src = stripBitcast(Src);
4168 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4183 if (Src.getValueSizeInBits() == 16) {
4192 Src.getOperand(0).getValueType() == MVT::i32) {
4193 Src = Src.getOperand(0);
4197 if (Subtarget->useRealTrue16Insts())
4199 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4200 }
else if (IsExtractHigh)
4206bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4209 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4211 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4215bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4218 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4219 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4223bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4226 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4228 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4232bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4235 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4236 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4244 unsigned NumOpcodes = 0;
4257 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4260 if (
C->isAllOnes()) {
4270 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4284 if (Src.size() == 3) {
4290 if (
C->isAllOnes()) {
4292 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4293 if (Src[
I] ==
LHS) {
4305 Bits = SrcBits[Src.size()];
4310 switch (In.getOpcode()) {
4318 if (!getOperandBits(
LHS, LHSBits) ||
4319 !getOperandBits(
RHS, RHSBits)) {
4320 Src = std::move(Backup);
4321 return std::make_pair(0, 0);
4327 NumOpcodes +=
Op.first;
4328 LHSBits =
Op.second;
4333 NumOpcodes +=
Op.first;
4334 RHSBits =
Op.second;
4339 return std::make_pair(0, 0);
4343 switch (In.getOpcode()) {
4345 TTbl = LHSBits & RHSBits;
4348 TTbl = LHSBits | RHSBits;
4351 TTbl = LHSBits ^ RHSBits;
4357 return std::make_pair(NumOpcodes + 1, TTbl);
4364 unsigned NumOpcodes;
4366 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4370 if (NumOpcodes < 2 || Src.empty())
4376 if (NumOpcodes < 4 && !In->isDivergent())
4379 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4384 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4385 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4399 while (Src.size() < 3)
4400 Src.push_back(Src[0]);
4406 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4412 return CurDAG->getUNDEF(MVT::i32);
4416 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4421 return CurDAG->getConstant(
4422 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4432bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4433 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4435 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4436 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4439 bool AllUsesAcceptSReg =
true;
4441 Limit < 10 && U !=
E; ++U, ++Limit) {
4442 const TargetRegisterClass *RC =
4443 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4451 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4452 RC != &AMDGPU::VS_64_Align2RegClass) {
4453 AllUsesAcceptSReg =
false;
4454 SDNode *
User =
U->getUser();
4455 if (
User->isMachineOpcode()) {
4456 unsigned Opc =
User->getMachineOpcode();
4457 const MCInstrDesc &
Desc = SII->get(
Opc);
4458 if (
Desc.isCommutable()) {
4459 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4462 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4463 const TargetRegisterClass *CommutedRC =
4464 getOperandRegClass(
U->getUser(), CommutedOpNo);
4465 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4466 CommutedRC == &AMDGPU::VS_64RegClass ||
4467 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4468 AllUsesAcceptSReg =
true;
4476 if (!AllUsesAcceptSReg)
4480 return !AllUsesAcceptSReg && (Limit < 10);
4483bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4485 const MachineMemOperand *MMO = Ld->getMemOperand();
4503 (Subtarget->getScalarizeGlobalBehavior() &&
4507 ->isMemOpHasNoClobberedMemOperand(
N)));
4513 bool IsModified =
false;
4519 while (Position !=
CurDAG->allnodes_end()) {
4526 if (ResNode !=
Node) {
4532 CurDAG->RemoveDeadNodes();
4533 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ CONVERGENCECTRL_GLUE
This does not correspond to any convergence control intrinsic.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.