29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
135 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
140#ifdef EXPENSIVE_CHECKS
145 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
166bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
193 case ISD::FNEARBYINT:
194 case ISD::FROUNDEVEN:
230#ifdef EXPENSIVE_CHECKS
234 assert(L->isLCSSAForm(DT));
243#ifdef EXPENSIVE_CHECKS
251 assert(Subtarget->d16PreservesUnusedBits());
252 MVT VT =
N->getValueType(0).getSimpleVT();
253 if (VT != MVT::v2i16 && VT != MVT::v2f16)
284 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
297 if (LdLo &&
Lo.hasOneUse()) {
311 TiedIn =
CurDAG->getNode(ISD::BITCAST,
SDLoc(
N), VT, TiedIn);
318 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
331 if (!Subtarget->d16PreservesUnusedBits())
336 bool MadeChange =
false;
337 while (Position !=
CurDAG->allnodes_begin()) {
342 switch (
N->getOpcode()) {
353 CurDAG->RemoveDeadNodes();
359bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
365 return TII->isInlineConstant(
C->getAPIntValue());
368 return TII->isInlineConstant(
C->getValueAPF());
378 unsigned OpNo)
const {
379 if (!
N->isMachineOpcode()) {
382 if (
Reg.isVirtual()) {
384 return MRI.getRegClass(
Reg);
387 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
388 return TRI->getPhysRegBaseClass(
Reg);
394 switch (
N->getMachineOpcode()) {
396 const MCInstrDesc &
Desc =
397 Subtarget->getInstrInfo()->get(
N->getMachineOpcode());
398 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
401 int RegClass =
Desc.operands()[
OpIdx].RegClass;
405 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
407 case AMDGPU::REG_SEQUENCE: {
408 unsigned RCID =
N->getConstantOperandVal(0);
409 const TargetRegisterClass *SuperRC =
410 Subtarget->getRegisterInfo()->getRegClass(RCID);
412 SDValue SubRegOp =
N->getOperand(OpNo + 1);
414 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
423 Ops.push_back(NewChain);
424 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
425 Ops.push_back(
N->getOperand(i));
428 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
435 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
438 return glueCopyToOp(
N,
M0,
M0.getValue(1));
441SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
444 if (Subtarget->ldsRequiresM0Init())
446 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
448 MachineFunction &
MF =
CurDAG->getMachineFunction();
449 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
451 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
458 SDNode *
Lo =
CurDAG->getMachineNode(
459 AMDGPU::S_MOV_B32,
DL, MVT::i32,
461 SDNode *
Hi =
CurDAG->getMachineNode(
462 AMDGPU::S_MOV_B32,
DL, MVT::i32,
465 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
469 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
473 EVT VT =
N->getValueType(0);
477 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
479 if (NumVectorElts == 1) {
480 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
485 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
486 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
489 bool AllConst =
true;
491 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
499 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
502 C |= Val << (EltSize *
I);
507 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
508 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
514 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
521 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
522 bool IsRegSeq =
true;
523 unsigned NOps =
N->getNumOperands();
524 for (
unsigned i = 0; i < NOps; i++) {
532 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
533 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
535 if (NOps != NumVectorElts) {
540 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
543 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
544 RegSeqArgs[1 + (2 * i) + 1] =
551 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
555 EVT VT =
N->getValueType(0);
559 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
573 Mask[0] < 4 && Mask[1] < 4);
575 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
576 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
577 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
578 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
581 Src0SubReg = Src1SubReg;
583 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
588 Src1SubReg = Src0SubReg;
590 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
600 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
601 Src1SubReg == AMDGPU::sub0) {
617 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
618 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
621 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
622 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
632 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
634 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
637 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
638 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
639 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
640 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
644 unsigned int Opc =
N->getOpcode();
645 if (
N->isMachineOpcode()) {
653 N = glueCopyToM0LDSInit(
N);
668 if (
N->getValueType(0) != MVT::i64)
671 SelectADD_SUB_I64(
N);
676 if (
N->getValueType(0) != MVT::i32)
683 SelectUADDO_USUBO(
N);
687 SelectFMUL_W_CHAIN(
N);
691 SelectFMA_W_CHAIN(
N);
697 EVT VT =
N->getValueType(0);
711 unsigned RegClassID =
722 if (
N->getValueType(0) == MVT::i128) {
723 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
724 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
725 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
726 }
else if (
N->getValueType(0) == MVT::i64) {
727 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
728 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
729 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
733 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
734 N->getOperand(1), SubReg1 };
736 N->getValueType(0),
Ops));
742 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
743 Subtarget->has64BitLiterals())
748 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
753 Imm =
C->getZExtValue();
800 return SelectMUL_LOHI(
N);
811 if (
N->getValueType(0) != MVT::i32)
828 if (
N->getValueType(0) == MVT::i32) {
831 { N->getOperand(0), N->getOperand(1) });
839 SelectINTRINSIC_W_CHAIN(
N);
843 SelectINTRINSIC_WO_CHAIN(
N);
847 SelectINTRINSIC_VOID(
N);
851 SelectWAVE_ADDRESS(
N);
854 case ISD::STACKRESTORE: {
855 SelectSTACKRESTORE(
N);
863bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
866 return Term->getMetadata(
"amdgpu.uniform") ||
867 Term->getMetadata(
"structurizecfg.uniform");
870bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
871 unsigned ShAmtBits)
const {
874 const APInt &
RHS =
N->getConstantOperandAPInt(1);
875 if (
RHS.countr_one() >= ShAmtBits)
905 N1 =
Lo.getOperand(1);
915 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
930 return "AMDGPU DAG->DAG Pattern Instruction Selection";
940#ifdef EXPENSIVE_CHECKS
946 for (
auto &L : LI.getLoopsInPreorder())
947 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
971 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
985SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
987 SDNode *Mov =
CurDAG->getMachineNode(
988 AMDGPU::S_MOV_B32,
DL, MVT::i32,
989 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
994void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
999 unsigned Opcode =
N->getOpcode();
1008 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1009 DL, MVT::i32,
LHS, Sub0);
1010 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1011 DL, MVT::i32,
LHS, Sub1);
1013 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1014 DL, MVT::i32,
RHS, Sub0);
1015 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1016 DL, MVT::i32,
RHS, Sub1);
1018 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1020 static const unsigned OpcMap[2][2][2] = {
1021 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1022 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1023 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1024 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1026 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1027 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1030 if (!ConsumeCarry) {
1032 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1035 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1042 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1045 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1052 MVT::i64, RegSequenceArgs);
1063void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1068 if (
N->isDivergent()) {
1070 : AMDGPU::V_SUBB_U32_e64;
1072 N,
Opc,
N->getVTList(),
1074 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1077 : AMDGPU::S_SUB_CO_PSEUDO;
1078 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1082void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1087 bool IsVALU =
N->isDivergent();
1089 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1091 if (UI.getUse().getResNo() == 1) {
1092 if (UI->isMachineOpcode()) {
1093 if (UI->getMachineOpcode() !=
1094 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1107 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1110 N,
Opc,
N->getVTList(),
1111 {N->getOperand(0), N->getOperand(1),
1112 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1114 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1115 : AMDGPU::S_USUBO_PSEUDO;
1117 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1118 {N->getOperand(0), N->getOperand(1)});
1122void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1126 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1127 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1128 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1129 Ops[8] =
N->getOperand(0);
1130 Ops[9] =
N->getOperand(4);
1134 bool UseFMAC = Subtarget->hasDLInsts() &&
1138 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1139 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1142void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1146 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1147 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1148 Ops[6] =
N->getOperand(0);
1149 Ops[7] =
N->getOperand(3);
1151 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1156void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1157 EVT VT =
N->getValueType(0);
1159 assert(VT == MVT::f32 || VT == MVT::f64);
1162 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1167 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1168 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1169 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1175void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1179 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && !
N->hasAnyUseOfValue(1);
1180 if (Subtarget->hasMADIntraFwdBug())
1181 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1182 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1183 else if (UseNoCarry)
1184 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1186 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1189 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1193 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1204void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1209 if (Subtarget->hasMadU64U32NoCarry()) {
1210 VTList =
CurDAG->getVTList(MVT::i64);
1211 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1213 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1214 if (Subtarget->hasMADIntraFwdBug()) {
1215 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1216 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1218 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1225 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1227 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1228 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1229 MVT::i32,
SDValue(Mad, 0), Sub0);
1233 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1234 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1235 MVT::i32,
SDValue(Mad, 0), Sub1);
1245 if (!
Base || Subtarget->hasUsableDSOffset() ||
1246 Subtarget->unsafeDSOffsetFoldingEnabled())
1257 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1270 int64_t ByteOffset =
C->getSExtValue();
1271 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1280 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1286 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1287 if (Subtarget->hasAddNoCarry()) {
1288 SubOp = AMDGPU::V_SUB_U32_e64;
1290 CurDAG->getTargetConstant(0, {}, MVT::i1));
1293 MachineSDNode *MachineSub =
1294 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1310 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1312 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1313 DL, MVT::i32, Zero);
1315 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1322 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1326bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1328 unsigned Size)
const {
1329 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1334 if (!
Base || Subtarget->hasUsableDSOffset() ||
1335 Subtarget->unsafeDSOffsetFoldingEnabled())
1353bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1359 if (Subtarget->hasSignedScratchOffsets())
1369 ConstantSDNode *ImmOp =
nullptr;
1380bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1386 if (Subtarget->hasSignedScratchOffsets())
1396bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1410 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1413 auto LHS =
Base.getOperand(0);
1414 auto RHS =
Base.getOperand(1);
1422 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1428 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1433 unsigned Size)
const {
1436 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1441 unsigned OffsetValue1 = OffsetValue0 +
Size;
1444 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1446 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1447 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1452 if (
const ConstantSDNode *
C =
1454 unsigned OffsetValue0 =
C->getZExtValue();
1455 unsigned OffsetValue1 = OffsetValue0 +
Size;
1457 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1467 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1471 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1472 if (Subtarget->hasAddNoCarry()) {
1473 SubOp = AMDGPU::V_SUB_U32_e64;
1475 CurDAG->getTargetConstant(0, {}, MVT::i1));
1478 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1483 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1485 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1491 unsigned OffsetValue0 = CAddr->getZExtValue();
1492 unsigned OffsetValue1 = OffsetValue0 +
Size;
1494 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1496 MachineSDNode *MovZero =
1497 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1499 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1500 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1508 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1509 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1519 if (Subtarget->useFlatForGlobal())
1524 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1525 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1526 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1527 SOffset = Subtarget->hasRestrictedSOffset()
1528 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1529 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1531 ConstantSDNode *C1 =
nullptr;
1533 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1546 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1570 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1574 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1584 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1595 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1601bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1608 if (!Subtarget->hasAddr64())
1611 if (!SelectMUBUF(Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1615 if (
C->getSExtValue()) {
1628std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1633 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1639 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1642bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1648 MachineFunction &
MF =
CurDAG->getMachineFunction();
1649 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1651 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1654 int64_t
Imm = CAddr->getSExtValue();
1655 const int64_t NullPtr =
1658 if (Imm != NullPtr) {
1661 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1662 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1663 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1664 VAddr =
SDValue(MovHighBits, 0);
1666 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1667 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1672 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1693 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1694 if (
TII->isLegalMUBUFImmOffset(C1) &&
1695 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1696 CurDAG->SignBitIsZero(N0))) {
1697 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1698 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1704 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1705 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1713 if (!
Reg.isPhysical())
1715 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1716 return RC &&
TRI.isSGPRClass(RC);
1719bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1724 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1725 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1726 MachineFunction &
MF =
CurDAG->getMachineFunction();
1727 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1732 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1738 ConstantSDNode *CAddr;
1751 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1756 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1762bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1766 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1768 if (!SelectMUBUF(Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1774 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1787bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1789 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1790 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1794 SOffset = ByteOffsetNode;
1812bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
SDNode *
N,
SDValue Addr,
1814 uint64_t FlatVariant)
const {
1815 int64_t OffsetVal = 0;
1819 bool CanHaveFlatSegmentOffsetBug =
1820 Subtarget->hasFlatSegmentOffsetBug() &&
1824 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1826 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1828 isFlatScratchBaseLegal(Addr))) {
1831 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1832 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1834 OffsetVal = COffsetVal;
1847 uint64_t RemainderOffset;
1849 std::tie(OffsetVal, RemainderOffset) =
1850 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1853 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1860 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1861 if (Subtarget->hasAddNoCarry()) {
1862 AddOp = AMDGPU::V_ADD_U32_e64;
1865 Addr =
SDValue(
CurDAG->getMachineNode(AddOp,
DL, MVT::i32, Opnds), 0);
1872 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1873 DL, MVT::i32, N0, Sub0);
1874 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1875 DL, MVT::i32, N0, Sub1);
1878 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1880 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1883 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1884 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1886 SDNode *Addc =
CurDAG->getMachineNode(
1887 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1891 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL, MVT::i32),
1895 MVT::i64, RegSequenceArgs),
1903 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1907bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1913bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1919bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1922 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1930 if (
Op.getValueType() == MVT::i32)
1945bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
1948 bool NeedIOffset)
const {
1949 int64_t ImmOffset = 0;
1950 ScaleOffset =
false;
1956 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
1958 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1964 ImmOffset = COffsetVal;
1965 }
else if (!
LHS->isDivergent()) {
1966 if (COffsetVal > 0) {
1971 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
1973 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1977 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
1979 SDNode *VMov =
CurDAG->getMachineNode(
1980 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1981 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1984 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
1994 unsigned NumLiterals =
1995 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
1996 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
1997 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2006 if (!
LHS->isDivergent()) {
2009 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2011 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2018 if (!SAddr && !
RHS->isDivergent()) {
2020 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2022 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2029 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2034 if (Subtarget->hasScaleOffset() &&
2035 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2050 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2063 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2064 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2066 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2070bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2075 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2083bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2088 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2093 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2099bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2105 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2110 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2116bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2121 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2125 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2129bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2135 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2141 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2147bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2153 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2174 FI->getValueType(0));
2184bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2192 int64_t COffsetVal = 0;
2194 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2203 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2207 int64_t SplitImmOffset, RemainderOffset;
2208 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2211 COffsetVal = SplitImmOffset;
2215 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2216 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2217 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2222 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2228bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2230 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2236 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2243 return (VMax & 3) + (
SMax & 3) >= 4;
2246bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2250 int64_t ImmOffset = 0;
2254 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2256 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2261 ImmOffset = COffsetVal;
2262 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2266 int64_t SplitImmOffset, RemainderOffset;
2267 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2271 SDNode *VMov =
CurDAG->getMachineNode(
2272 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2273 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2276 if (!isFlatScratchBaseLegal(Addr))
2278 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2280 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2281 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2293 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2296 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2303 if (OrigAddr != Addr) {
2304 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2307 if (!isFlatScratchBaseLegalSV(OrigAddr))
2311 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2314 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2316 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2325bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2328 int64_t ImmOffset)
const {
2329 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2331 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2343 bool IsSigned)
const {
2344 bool ScaleOffset =
false;
2345 if (!Subtarget->hasScaleOffset() || !
Offset)
2361 (
Offset.isMachineOpcode() &&
2362 Offset.getMachineOpcode() ==
2363 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2364 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2366 ScaleOffset =
C->getZExtValue() ==
Size;
2378bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2380 bool Imm32Only,
bool IsBuffer,
2381 bool HasSOffset, int64_t ImmOffset,
2382 bool *ScaleOffset)
const {
2384 "Cannot match both soffset and offset at the same time!");
2389 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2399 *SOffset = ByteOffsetNode;
2400 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2406 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2413 SDLoc SL(ByteOffsetNode);
2417 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2419 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2420 if (EncodedOffset &&
Offset && !Imm32Only) {
2421 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2430 if (EncodedOffset &&
Offset && Imm32Only) {
2431 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2439 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2441 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2448SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2455 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2456 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2457 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2458 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2461 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2463 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2464 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2466 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2469 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2476bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2479 bool IsBuffer,
bool HasSOffset,
2481 bool *ScaleOffset)
const {
2483 assert(!Imm32Only && !IsBuffer);
2486 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2491 ImmOff =
C->getSExtValue();
2493 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2494 true, ImmOff, ScaleOffset);
2514 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2515 ImmOffset, ScaleOffset)) {
2519 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2520 ImmOffset, ScaleOffset)) {
2529 bool Imm32Only,
bool *ScaleOffset)
const {
2530 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2533 SBase = Expand32BitAddress(SBase);
2538 SBase = Expand32BitAddress(Addr);
2539 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2546bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2548 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2552bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2555 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2562 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2563 false, &ScaleOffset))
2567 SDLoc(
N), MVT::i32);
2571bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2576 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2580 SDLoc(
N), MVT::i32);
2585 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2589bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2592 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2596bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2600 return N.getValueType() == MVT::i32 &&
2601 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2606bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2611 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2636SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2640 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2644 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2646 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2650 uint32_t PackedVal =
Offset | (Width << 16);
2651 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2653 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2656void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2661 const SDValue &Shl =
N->getOperand(0);
2666 uint32_t BVal =
B->getZExtValue();
2667 uint32_t CVal =
C->getZExtValue();
2669 if (0 < BVal && BVal <= CVal && CVal < 32) {
2679void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2680 switch (
N->getOpcode()) {
2682 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2685 const SDValue &Srl =
N->getOperand(0);
2689 if (Shift && Mask) {
2691 uint32_t MaskVal =
Mask->getZExtValue();
2703 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2710 if (Shift && Mask) {
2712 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2721 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2722 SelectS_BFEFromShifts(
N);
2727 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2728 SelectS_BFEFromShifts(
N);
2743 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2753bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2754 assert(
N->getOpcode() == ISD::BRCOND);
2755 if (!
N->hasOneUse())
2765 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2769 if (VT == MVT::i64) {
2772 Subtarget->hasScalarCompareEq64();
2775 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2796 auto Cond = VCMP.getOperand(0);
2808void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2811 if (
Cond.isUndef()) {
2812 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2813 N->getOperand(2),
N->getOperand(0));
2817 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2819 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2820 bool AndExec = !UseSCCBr;
2821 bool Negate =
false;
2830 VCMP.getValueType().getSizeInBits() == Subtarget->getWavefrontSize()) {
2839 bool NegatedBallot =
false;
2842 UseSCCBr = !BallotCond->isDivergent();
2843 Negate = Negate ^ NegatedBallot;
2858 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2859 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2860 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2879 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2881 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2889 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2894void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2895 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2896 !
N->isDivergent()) {
2898 if (Src.getValueType() == MVT::f16) {
2900 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2910void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2913 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2914 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2919 MachineMemOperand *MMO =
M->getMemOperand();
2923 if (
CurDAG->isBaseWithConstantOffset(
Ptr)) {
2928 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2929 N = glueCopyToM0(
N, PtrBase);
2930 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2935 N = glueCopyToM0(
N,
Ptr);
2936 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2941 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2946 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
2952void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
2955 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2956 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2957 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2959 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2960 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2962 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2963 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2966 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2967 N->getOperand(5),
N->getOperand(0)};
2970 MachineMemOperand *MMO =
M->getMemOperand();
2971 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
2977 case Intrinsic::amdgcn_ds_gws_init:
2978 return AMDGPU::DS_GWS_INIT;
2979 case Intrinsic::amdgcn_ds_gws_barrier:
2980 return AMDGPU::DS_GWS_BARRIER;
2981 case Intrinsic::amdgcn_ds_gws_sema_v:
2982 return AMDGPU::DS_GWS_SEMA_V;
2983 case Intrinsic::amdgcn_ds_gws_sema_br:
2984 return AMDGPU::DS_GWS_SEMA_BR;
2985 case Intrinsic::amdgcn_ds_gws_sema_p:
2986 return AMDGPU::DS_GWS_SEMA_P;
2987 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2988 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2994void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2995 if (!Subtarget->hasGWS() ||
2996 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2997 !Subtarget->hasGWSSemaReleaseAll())) {
3004 const bool HasVSrc =
N->getNumOperands() == 4;
3005 assert(HasVSrc ||
N->getNumOperands() == 3);
3008 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3011 MachineMemOperand *MMO =
M->getMemOperand();
3024 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3025 ImmOffset = ConstOffset->getZExtValue();
3027 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3036 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3040 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3042 CurDAG->getTargetConstant(16, SL, MVT::i32));
3043 glueCopyToM0(
N,
SDValue(M0Base, 0));
3047 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3052 Ops.push_back(
N->getOperand(2));
3053 Ops.push_back(OffsetField);
3054 Ops.push_back(Chain);
3056 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3060void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3061 if (Subtarget->getLDSBankCount() != 16) {
3091 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3094 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3095 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3101 SDNode *InterpP1LV =
3102 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3103 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3107 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3110 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3111 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3118void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3119 unsigned IntrID =
N->getConstantOperandVal(1);
3121 case Intrinsic::amdgcn_ds_append:
3122 case Intrinsic::amdgcn_ds_consume: {
3123 if (
N->getValueType(0) != MVT::i32)
3125 SelectDSAppendConsume(
N, IntrID);
3128 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3129 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3130 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3131 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3132 SelectDSBvhStackIntrinsic(
N, IntrID);
3134 case Intrinsic::amdgcn_init_whole_wave:
3135 CurDAG->getMachineFunction()
3136 .getInfo<SIMachineFunctionInfo>()
3137 ->setInitWholeWave();
3144void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3145 unsigned IntrID =
N->getConstantOperandVal(0);
3146 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3147 SDNode *ConvGlueNode =
N->getGluedNode();
3153 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3154 MVT::Glue,
SDValue(ConvGlueNode, 0));
3156 ConvGlueNode =
nullptr;
3159 case Intrinsic::amdgcn_wqm:
3160 Opcode = AMDGPU::WQM;
3162 case Intrinsic::amdgcn_softwqm:
3163 Opcode = AMDGPU::SOFT_WQM;
3165 case Intrinsic::amdgcn_wwm:
3166 case Intrinsic::amdgcn_strict_wwm:
3167 Opcode = AMDGPU::STRICT_WWM;
3169 case Intrinsic::amdgcn_strict_wqm:
3170 Opcode = AMDGPU::STRICT_WQM;
3172 case Intrinsic::amdgcn_interp_p1_f16:
3173 SelectInterpP1F16(
N);
3175 case Intrinsic::amdgcn_permlane16_swap:
3176 case Intrinsic::amdgcn_permlane32_swap: {
3177 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3178 !Subtarget->hasPermlane16Swap()) ||
3179 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3180 !Subtarget->hasPermlane32Swap())) {
3185 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3186 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3187 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3191 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3193 bool FI =
N->getConstantOperandVal(3);
3194 NewOps[2] =
CurDAG->getTargetConstant(
3197 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3205 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3207 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3212 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3213 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3217void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3218 unsigned IntrID =
N->getConstantOperandVal(1);
3220 case Intrinsic::amdgcn_ds_gws_init:
3221 case Intrinsic::amdgcn_ds_gws_barrier:
3222 case Intrinsic::amdgcn_ds_gws_sema_v:
3223 case Intrinsic::amdgcn_ds_gws_sema_br:
3224 case Intrinsic::amdgcn_ds_gws_sema_p:
3225 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3226 SelectDS_GWS(
N, IntrID);
3235void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3237 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3238 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3239 {N->getOperand(0), Log2WaveSize});
3242void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3257 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3259 if (
N->isDivergent()) {
3260 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3265 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3266 {SrcVal, Log2WaveSize}),
3270 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3274bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3276 bool IsCanonicalizing,
3277 bool AllowAbs)
const {
3281 if (Src.getOpcode() == ISD::FNEG) {
3283 Src = Src.getOperand(0);
3284 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3288 if (
LHS &&
LHS->isZero()) {
3290 Src = Src.getOperand(1);
3294 if (AllowAbs && Src.getOpcode() == ISD::FABS) {
3296 Src = Src.getOperand(0);
3309 if (IsCanonicalizing)
3324 EVT VT = Src.getValueType();
3326 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3333 auto ReplaceSrc = [&]() ->
SDValue {
3335 return Src.getOperand(0);
3340 Src.getValueType(),
LHS, Index);
3366 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3368 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3375bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3378 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3380 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3387bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3390 if (SelectVOP3ModsImpl(In, Src, Mods,
3393 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3400bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3401 if (
In.getOpcode() == ISD::FABS ||
In.getOpcode() == ISD::FNEG)
3408bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3412 if (SelectVOP3ModsImpl(In, Src, Mods,
3417 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3424bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3426 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3429bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3431 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3434bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3438 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3439 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3441 return SelectVOP3Mods(In, Src, SrcMods);
3444bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3448 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3449 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3451 return SelectVOP3BMods(In, Src, SrcMods);
3454bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3459 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3460 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3465bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3466 SDValue &SrcMods,
bool IsDOT)
const {
3471 if (Src.getOpcode() == ISD::FNEG) {
3473 Src = Src.getOperand(0);
3477 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3478 unsigned VecMods = Mods;
3480 SDValue Lo = stripBitcast(Src.getOperand(0));
3481 SDValue Hi = stripBitcast(Src.getOperand(1));
3483 if (
Lo.getOpcode() == ISD::FNEG) {
3484 Lo = stripBitcast(
Lo.getOperand(0));
3488 if (
Hi.getOpcode() == ISD::FNEG) {
3489 Hi = stripBitcast(
Hi.getOperand(0));
3499 unsigned VecSize = Src.getValueSizeInBits();
3500 Lo = stripExtractLoElt(
Lo);
3501 Hi = stripExtractLoElt(
Hi);
3503 if (
Lo.getValueSizeInBits() > VecSize) {
3504 Lo =
CurDAG->getTargetExtractSubreg(
3505 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3509 if (
Hi.getValueSizeInBits() > VecSize) {
3510 Hi =
CurDAG->getTargetExtractSubreg(
3511 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3515 assert(
Lo.getValueSizeInBits() <= VecSize &&
3516 Hi.getValueSizeInBits() <= VecSize);
3518 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3522 if (VecSize ==
Lo.getValueSizeInBits()) {
3524 }
else if (VecSize == 32) {
3525 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3527 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3531 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3532 Lo.getValueType()), 0);
3533 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3534 : AMDGPU::SReg_64RegClassID;
3536 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3537 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3538 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3540 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3541 Src.getValueType(),
Ops), 0);
3543 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3549 .bitcastToAPInt().getZExtValue();
3551 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3552 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3559 Src.getNumOperands() == 2) {
3565 ArrayRef<int>
Mask = SVN->getMask();
3567 if (Mask[0] < 2 && Mask[1] < 2) {
3569 SDValue ShuffleSrc = SVN->getOperand(0);
3571 if (ShuffleSrc.
getOpcode() == ISD::FNEG) {
3582 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3590 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3594bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3596 return SelectVOP3PMods(In, Src, SrcMods,
true);
3599bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3602 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3605 unsigned SrcVal =
C->getZExtValue();
3609 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3616 unsigned DstRegClass;
3618 switch (Elts.
size()) {
3620 DstRegClass = AMDGPU::VReg_256RegClassID;
3624 DstRegClass = AMDGPU::VReg_128RegClassID;
3628 DstRegClass = AMDGPU::VReg_64RegClassID;
3637 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3638 Ops.push_back(Elts[i]);
3649 assert(
"unhandled Reg sequence size" &&
3650 (Elts.
size() == 8 || Elts.
size() == 16));
3654 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3655 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3663 {Elts[i + 1], Elts[i], PackLoLo});
3673 const SDLoc &
DL,
unsigned ElementSize) {
3674 if (ElementSize == 16)
3676 if (ElementSize == 32)
3684 unsigned ElementSize) {
3685 if (ModOpcode == ISD::FNEG) {
3689 for (
auto El : Elts) {
3690 if (El.getOpcode() != ISD::FABS)
3692 NegAbsElts.
push_back(El->getOperand(0));
3694 if (Elts.size() != NegAbsElts.
size()) {
3703 assert(ModOpcode == ISD::FABS);
3714 std::function<
bool(
SDValue)> ModifierCheck) {
3718 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3719 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3720 if (!ModifierCheck(ElF16))
3727bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3770 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3774bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3785 if (EltsF16.
empty())
3786 ModOpcode = (ElF16.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3806 if (EltsV2F16.
empty())
3807 ModOpcode = (ElV2f16.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3819 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3823bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3833 unsigned ModOpcode =
3834 (ElF32.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3848 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3852bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3854 BitVector UndefElements;
3856 if (isInlineImmediate(
Splat.getNode())) {
3858 unsigned Imm =
C->getAPIntValue().getSExtValue();
3859 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3863 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3864 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3872 SDValue SplatSrc32 = stripBitcast(In);
3874 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3875 SDValue SplatSrc16 = stripBitcast(Splat32);
3878 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
3879 std::optional<APInt> RawValue;
3881 RawValue =
C->getValueAPF().bitcastToAPInt();
3883 RawValue =
C->getAPIntValue();
3885 if (RawValue.has_value()) {
3886 EVT VT =
In.getValueType().getScalarType();
3892 if (
TII->isInlineConstant(FloatVal)) {
3893 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3898 if (
TII->isInlineConstant(RawValue.value())) {
3899 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3912bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3918 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
3927 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
3931bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3937 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
3946 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
3950bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
3958 const SDValue &ExtendSrc =
In.getOperand(0);
3961 }
else if (
In->getOpcode() == ISD::BITCAST) {
3962 const SDValue &CastSrc =
In.getOperand(0);
3966 if (Zero &&
Zero->getZExtValue() == 0)
3977 Src = ExtractVecEltSrc;
3981 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
3985bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3989 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
3993bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3996 return SelectVOP3Mods(In, Src, SrcMods);
4006 if (
Op.getValueType() != MVT::f32 ||
Op.getOpcode() != ISD::BITCAST)
4008 Op =
Op.getOperand(0);
4010 IsExtractHigh =
false;
4013 if (!Low16 || !Low16->isZero())
4015 Op = stripBitcast(
Op.getOperand(1));
4016 if (
Op.getValueType() != MVT::bf16)
4021 if (
Op.getValueType() != MVT::i32)
4026 if (Mask->getZExtValue() == 0xffff0000) {
4027 IsExtractHigh =
true;
4028 return Op.getOperand(0);
4037 return Op.getOperand(0);
4046bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4050 SelectVOP3ModsImpl(In, Src, Mods);
4052 bool IsExtractHigh =
false;
4053 if (Src.getOpcode() == ISD::FP_EXTEND) {
4054 Src = Src.getOperand(0);
4055 }
else if (VT == MVT::bf16) {
4063 if (Src.getValueType() != VT &&
4064 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4067 Src = stripBitcast(Src);
4073 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4088 if (Src.getValueSizeInBits() == 16) {
4097 Src.getOperand(0).getValueType() == MVT::i32) {
4098 Src = Src.getOperand(0);
4102 if (Subtarget->useRealTrue16Insts())
4104 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4105 }
else if (IsExtractHigh)
4111bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4114 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4116 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4120bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4123 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4124 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4128bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4131 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4133 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4137bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4140 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4141 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4149 unsigned NumOpcodes = 0;
4162 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4165 if (
C->isAllOnes()) {
4175 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4189 if (Src.size() == 3) {
4195 if (
C->isAllOnes()) {
4197 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4198 if (Src[
I] ==
LHS) {
4210 Bits = SrcBits[Src.size()];
4215 switch (In.getOpcode()) {
4223 if (!getOperandBits(
LHS, LHSBits) ||
4224 !getOperandBits(
RHS, RHSBits)) {
4226 return std::make_pair(0, 0);
4232 NumOpcodes +=
Op.first;
4233 LHSBits =
Op.second;
4238 NumOpcodes +=
Op.first;
4239 RHSBits =
Op.second;
4244 return std::make_pair(0, 0);
4248 switch (In.getOpcode()) {
4250 TTbl = LHSBits & RHSBits;
4253 TTbl = LHSBits | RHSBits;
4256 TTbl = LHSBits ^ RHSBits;
4262 return std::make_pair(NumOpcodes + 1, TTbl);
4269 unsigned NumOpcodes;
4271 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4275 if (NumOpcodes < 2 || Src.empty())
4281 if (NumOpcodes < 4 && !In->isDivergent())
4284 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4289 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4290 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4304 while (Src.size() < 3)
4305 Src.push_back(Src[0]);
4311 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4317 return CurDAG->getUNDEF(MVT::i32);
4321 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4326 return CurDAG->getConstant(
4327 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4337bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4338 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4340 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4341 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4344 bool AllUsesAcceptSReg =
true;
4346 Limit < 10 && U !=
E; ++U, ++Limit) {
4347 const TargetRegisterClass *RC =
4348 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4356 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
4357 AllUsesAcceptSReg =
false;
4358 SDNode *
User =
U->getUser();
4359 if (
User->isMachineOpcode()) {
4360 unsigned Opc =
User->getMachineOpcode();
4361 const MCInstrDesc &
Desc = SII->get(
Opc);
4362 if (
Desc.isCommutable()) {
4363 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4366 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4367 const TargetRegisterClass *CommutedRC =
4368 getOperandRegClass(
U->getUser(), CommutedOpNo);
4369 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4370 CommutedRC == &AMDGPU::VS_64RegClass)
4371 AllUsesAcceptSReg =
true;
4379 if (!AllUsesAcceptSReg)
4383 return !AllUsesAcceptSReg && (Limit < 10);
4386bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4389 const MachineMemOperand *MMO = Ld->getMemOperand();
4399 (Subtarget->getScalarizeGlobalBehavior() &&
4403 ->isMemOpHasNoClobberedMemOperand(
N)));
4409 bool IsModified =
false;
4415 while (Position !=
CurDAG->allnodes_end()) {
4422 if (ResNode !=
Node) {
4428 CurDAG->RemoveDeadNodes();
4429 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
@ User
could "use" a pointer
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Implement std::hash so that hash_code can be used in STL containers.
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.