30#include "llvm/IR/IntrinsicsAMDGPU.h"
34#ifdef EXPENSIVE_CHECKS
39#define DEBUG_TYPE "amdgpu-isel"
54 In = stripBitcast(In);
60 Out = In.getOperand(0);
71 if (ShiftAmt->getZExtValue() == 16) {
87 return In.getOperand(0);
92 if (Src.getValueType().getSizeInBits() == 32)
93 return stripBitcast(Src);
102 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
107#ifdef EXPENSIVE_CHECKS
112 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
135bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
199#ifdef EXPENSIVE_CHECKS
200 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
201 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
203 assert(L->isLCSSAForm(DT));
212#ifdef EXPENSIVE_CHECKS
221 MVT VT =
N->getValueType(0).getSimpleVT();
222 if (VT != MVT::v2i16 && VT != MVT::v2f16)
228 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
265 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
266 if (LdLo &&
Lo.hasOneUse()) {
305 bool MadeChange =
false;
311 switch (
N->getOpcode()) {
328bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
334 return TII->isInlineConstant(
C->getAPIntValue());
337 return TII->isInlineConstant(
C->getValueAPF());
347 unsigned OpNo)
const {
348 if (!
N->isMachineOpcode()) {
350 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
351 if (
Reg.isVirtual()) {
353 return MRI.getRegClass(Reg);
357 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
358 return TRI->getPhysRegBaseClass(Reg);
364 switch (
N->getMachineOpcode()) {
368 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
369 if (OpIdx >=
Desc.getNumOperands())
371 int RegClass =
Desc.operands()[OpIdx].RegClass;
377 case AMDGPU::REG_SEQUENCE: {
378 unsigned RCID =
N->getConstantOperandVal(0);
382 SDValue SubRegOp =
N->getOperand(OpNo + 1);
392 SmallVector <SDValue, 8> Ops;
394 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
405 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
408 return glueCopyToOp(
N,
M0,
M0.getValue(1));
411SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
412 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
428 AMDGPU::S_MOV_B32,
DL, MVT::i32,
442 EVT VT =
N->getValueType(0);
448 if (NumVectorElts == 1) {
454 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
464 bool IsRegSeq =
true;
465 unsigned NOps =
N->getNumOperands();
466 for (
unsigned i = 0; i < NOps; i++) {
468 if (isa<RegisterSDNode>(
N->getOperand(i))) {
474 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
477 if (NOps != NumVectorElts) {
482 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
485 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
486 RegSeqArgs[1 + (2 * i) + 1] =
497 unsigned int Opc =
N->getOpcode();
498 if (
N->isMachineOpcode()) {
506 N = glueCopyToM0LDSInit(
N);
521 if (
N->getValueType(0) != MVT::i64)
524 SelectADD_SUB_I64(
N);
529 if (
N->getValueType(0) != MVT::i32)
536 SelectUADDO_USUBO(
N);
540 SelectFMUL_W_CHAIN(
N);
544 SelectFMA_W_CHAIN(
N);
550 EVT VT =
N->getValueType(0);
564 unsigned RegClassID =
572 if (
N->getValueType(0) == MVT::i128) {
576 }
else if (
N->getValueType(0) == MVT::i64) {
583 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
584 N->getOperand(1), SubReg1 };
586 N->getValueType(0), Ops));
592 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
597 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
602 Imm =
C->getZExtValue();
649 return SelectMUL_LOHI(
N);
660 if (
N->getValueType(0) != MVT::i32)
677 if (
N->getValueType(0) == MVT::i32) {
680 { N->getOperand(0), N->getOperand(1) });
688 SelectINTRINSIC_W_CHAIN(
N);
692 SelectINTRINSIC_WO_CHAIN(
N);
696 SelectINTRINSIC_VOID(
N);
700 SelectWAVE_ADDRESS(
N);
704 SelectSTACKRESTORE(
N);
712bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
715 return Term->getMetadata(
"amdgpu.uniform") ||
716 Term->getMetadata(
"structurizecfg.uniform");
719bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
720 unsigned ShAmtBits)
const {
723 const APInt &
RHS =
N->getConstantOperandAPInt(1);
724 if (
RHS.countr_one() >= ShAmtBits)
728 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
754 N1 =
Lo.getOperand(1);
771 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
779 return "AMDGPU DAG->DAG Pattern Instruction Selection";
789#ifdef EXPENSIVE_CHECKS
795 for (
auto &L : LI.getLoopsInPreorder())
796 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
815 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
819 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
823 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
834SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
837 AMDGPU::S_MOV_B32,
DL, MVT::i32,
843void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
848 unsigned Opcode =
N->getOpcode();
858 DL, MVT::i32, LHS, Sub0);
860 DL, MVT::i32, LHS, Sub1);
863 DL, MVT::i32, RHS, Sub0);
865 DL, MVT::i32, RHS, Sub1);
869 static const unsigned OpcMap[2][2][2] = {
870 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
871 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
872 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
873 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
875 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
876 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
901 MVT::i64, RegSequenceArgs);
912void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
918 if (
N->isDivergent()) {
920 : AMDGPU::V_SUBB_U32_e64;
922 N, Opc,
N->getVTList(),
924 CurDAG->getTargetConstant(0, {}, MVT::i1) });
927 : AMDGPU::S_SUB_CO_PSEUDO;
928 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
932void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
937 bool IsVALU =
N->isDivergent();
941 if (UI.getUse().getResNo() == 1) {
950 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
953 N, Opc,
N->getVTList(),
954 {N->getOperand(0), N->getOperand(1),
955 CurDAG->getTargetConstant(0, {}, MVT::i1) });
957 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
958 : AMDGPU::S_USUBO_PSEUDO;
960 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
961 {
N->getOperand(0),
N->getOperand(1)});
965void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
970 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
971 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
972 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
973 Ops[8] =
N->getOperand(0);
974 Ops[9] =
N->getOperand(4);
979 cast<ConstantSDNode>(Ops[0])->isZero() &&
980 cast<ConstantSDNode>(Ops[2])->isZero() &&
981 cast<ConstantSDNode>(Ops[4])->isZero();
982 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
986void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
991 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
992 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
993 Ops[6] =
N->getOperand(0);
994 Ops[7] =
N->getOperand(3);
1001void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1003 EVT VT =
N->getValueType(0);
1005 assert(VT == MVT::f32 || VT == MVT::f64);
1008 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1013 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1014 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1015 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1021void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1026 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1027 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1029 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1032 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1039void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1044 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1045 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1047 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1051 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1056 MVT::i32,
SDValue(Mad, 0), Sub0);
1062 MVT::i32,
SDValue(Mad, 0), Sub1);
1097 int64_t ByteOffset =
C->getSExtValue();
1098 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1105 Zero,
Addr.getOperand(1));
1107 if (isDSOffsetLegal(Sub, ByteOffset)) {
1113 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1115 SubOp = AMDGPU::V_SUB_U32_e64;
1137 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1140 DL, MVT::i32, Zero);
1153bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1155 unsigned Size)
const {
1156 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1158 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1173 Addr->getFlags().hasNoUnsignedWrap()) ||
1180bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1189 auto LHS =
Addr.getOperand(0);
1190 auto RHS =
Addr.getOperand(1);
1197 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1207bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1216 auto LHS =
Addr.getOperand(0);
1217 auto RHS =
Addr.getOperand(1);
1223bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1230 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1237 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1240 auto LHS =
Base.getOperand(0);
1241 auto RHS =
Base.getOperand(1);
1249 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1255 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1260 unsigned Size)
const {
1268 unsigned OffsetValue1 = OffsetValue0 +
Size;
1271 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1280 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1281 unsigned OffsetValue0 =
C->getZExtValue();
1282 unsigned OffsetValue1 = OffsetValue0 +
Size;
1284 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1294 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1298 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1300 SubOp = AMDGPU::V_SUB_U32_e64;
1316 unsigned OffsetValue0 = CAddr->getZExtValue();
1317 unsigned OffsetValue1 = OffsetValue0 +
Size;
1319 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1359 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1361 N0 =
Addr.getOperand(0);
1420 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1436 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1440 if (
C->getSExtValue()) {
1453std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1456 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1467bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1479 int64_t
Imm = CAddr->getSExtValue();
1480 const int64_t NullPtr =
1483 if (Imm != NullPtr) {
1488 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1489 VAddr =
SDValue(MovHighBits, 0);
1519 if (
TII->isLegalMUBUFImmOffset(C1) &&
1522 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1529 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1537 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1538 if (!Reg.isPhysical())
1540 auto RC =
TRI.getPhysRegBaseClass(Reg);
1541 return RC &&
TRI.isSGPRClass(RC);
1544bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1567 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1573 SOffset =
Addr.getOperand(0);
1574 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1594 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1597 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1598 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1599 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1613bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1620 SOffset = ByteOffsetNode;
1630 assert(isa<BuildVectorSDNode>(
N));
1641 int64_t OffsetVal = 0;
1645 bool CanHaveFlatSegmentOffsetBug =
1652 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1654 isFlatScratchBaseLegal(
Addr))) {
1655 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1658 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1660 OffsetVal = COffsetVal;
1675 std::tie(OffsetVal, RemainderOffset) =
1676 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1679 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1682 if (
Addr.getValueType().getSizeInBits() == 32) {
1686 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1688 AddOp = AMDGPU::V_ADD_U32_e64;
1699 DL, MVT::i32, N0, Sub0);
1701 DL, MVT::i32, N0, Sub1);
1704 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1710 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1713 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1721 MVT::i64, RegSequenceArgs),
1748 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1762bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1767 int64_t ImmOffset = 0;
1773 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1774 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1780 ImmOffset = COffsetVal;
1781 }
else if (!
LHS->isDivergent()) {
1782 if (COffsetVal > 0) {
1787 int64_t SplitImmOffset, RemainderOffset;
1788 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1791 if (isUInt<32>(RemainderOffset)) {
1793 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1807 unsigned NumLiterals =
1808 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1809 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1820 if (!
LHS->isDivergent()) {
1828 if (!SAddr && !
RHS->isDivergent()) {
1843 isa<ConstantSDNode>(
Addr))
1858 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1861 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1864 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1866 FI->getValueType(0));
1879 if (
Addr->isDivergent())
1884 int64_t COffsetVal = 0;
1887 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1888 SAddr =
Addr.getOperand(0);
1899 int64_t SplitImmOffset, RemainderOffset;
1900 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1903 COffsetVal = SplitImmOffset;
1907 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1908 :
CurDAG->getTargetConstant(RemainderOffset,
DL,
MVT::i32);
1920bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1935 return (VMax & 3) + (
SMax & 3) >= 4;
1941 int64_t ImmOffset = 0;
1945 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1946 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1951 ImmOffset = COffsetVal;
1952 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1956 int64_t SplitImmOffset, RemainderOffset;
1957 std::tie(SplitImmOffset, RemainderOffset)
1960 if (isUInt<32>(RemainderOffset)) {
1962 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1966 if (!isFlatScratchBaseLegal(
Addr))
1968 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1982 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1985 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1992 if (OrigAddr !=
Addr) {
1993 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1996 if (!isFlatScratchBaseLegalSV(OrigAddr))
2000 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2010bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2013 int64_t ImmOffset)
const {
2014 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2027bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
2029 bool Imm32Only,
bool IsBuffer,
2031 int64_t ImmOffset)
const {
2033 "Cannot match both soffset and offset at the same time!");
2042 *SOffset = ByteOffsetNode;
2043 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2049 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2056 SDLoc SL(ByteOffsetNode);
2060 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2062 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2063 if (EncodedOffset &&
Offset && !Imm32Only) {
2073 if (EncodedOffset &&
Offset && Imm32Only) {
2078 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2092 if (
Addr.getValueType() != MVT::i32)
2100 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2121 bool Imm32Only,
bool IsBuffer,
2123 int64_t ImmOffset)
const {
2125 assert(!Imm32Only && !IsBuffer);
2128 if (!SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset,
false,
false,
true))
2133 ImmOff =
C->getSExtValue();
2135 return SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr,
false,
false,
true,
2142 !
Addr->getFlags().hasNoUnsignedWrap())
2148 N0 =
Addr.getOperand(0);
2149 N1 =
Addr.getOperand(1);
2151 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2156 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2161 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2171 bool Imm32Only)
const {
2172 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2173 SBase = Expand32BitAddress(SBase);
2177 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2188 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2194 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2200 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2206 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2210 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2214bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2217 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2221bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2225 return N.getValueType() == MVT::i32 &&
2226 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2231bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2253 if (isa<ConstantSDNode>(
Index))
2261SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2265 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2271 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2281void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2286 const SDValue &Shl =
N->getOperand(0);
2294 if (0 < BVal && BVal <= CVal && CVal < 32) {
2304void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2305 switch (
N->getOpcode()) {
2307 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2310 const SDValue &Srl =
N->getOperand(0);
2314 if (Shift && Mask) {
2328 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2335 if (Shift && Mask) {
2346 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2347 SelectS_BFEFromShifts(
N);
2352 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2353 SelectS_BFEFromShifts(
N);
2364 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2368 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2378bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2380 if (!
N->hasOneUse())
2390 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2394 if (VT == MVT::i64) {
2415 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2419 auto Cond = VCMP.getOperand(0);
2431void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2434 if (
Cond.isUndef()) {
2436 N->getOperand(2),
N->getOperand(0));
2443 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2444 bool AndExec = !UseSCCBr;
2445 bool Negate =
false;
2450 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2454 VCMP.getValueType().getSizeInBits() ==
ST->getWavefrontSize()) {
2463 bool NegatedBallot =
false;
2466 UseSCCBr = !BallotCond->isDivergent();
2467 Negate = Negate ^ NegatedBallot;
2482 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2483 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2484 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2502 : AMDGPU::S_AND_B64,
2517void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2519 !
N->isDivergent()) {
2521 if (Src.getValueType() == MVT::f16) {
2533void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2536 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2537 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2551 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2552 N = glueCopyToM0(
N, PtrBase);
2558 N = glueCopyToM0(
N,
Ptr);
2566 N->getOperand(
N->getNumOperands() - 1)
2575void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2576 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2577 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2578 N->getOperand(5),
N->getOperand(0)};
2588 case Intrinsic::amdgcn_ds_gws_init:
2589 return AMDGPU::DS_GWS_INIT;
2590 case Intrinsic::amdgcn_ds_gws_barrier:
2591 return AMDGPU::DS_GWS_BARRIER;
2592 case Intrinsic::amdgcn_ds_gws_sema_v:
2593 return AMDGPU::DS_GWS_SEMA_V;
2594 case Intrinsic::amdgcn_ds_gws_sema_br:
2595 return AMDGPU::DS_GWS_SEMA_BR;
2596 case Intrinsic::amdgcn_ds_gws_sema_p:
2597 return AMDGPU::DS_GWS_SEMA_P;
2598 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2599 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2605void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2606 if (!Subtarget->
hasGWS() ||
2607 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2615 const bool HasVSrc =
N->getNumOperands() == 4;
2616 assert(HasVSrc ||
N->getNumOperands() == 3);
2619 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2630 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2636 ImmOffset = ConstOffset->getZExtValue();
2654 glueCopyToM0(
N,
SDValue(M0Base, 0));
2671void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2729void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2730 unsigned IntrID =
N->getConstantOperandVal(1);
2732 case Intrinsic::amdgcn_ds_append:
2733 case Intrinsic::amdgcn_ds_consume: {
2734 if (
N->getValueType(0) != MVT::i32)
2736 SelectDSAppendConsume(
N, IntrID);
2739 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2740 SelectDSBvhStackIntrinsic(
N);
2747void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2748 unsigned IntrID =
N->getConstantOperandVal(0);
2749 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2750 SDNode *ConvGlueNode =
N->getGluedNode();
2757 MVT::Glue,
SDValue(ConvGlueNode, 0));
2759 ConvGlueNode =
nullptr;
2762 case Intrinsic::amdgcn_wqm:
2763 Opcode = AMDGPU::WQM;
2765 case Intrinsic::amdgcn_softwqm:
2766 Opcode = AMDGPU::SOFT_WQM;
2768 case Intrinsic::amdgcn_wwm:
2769 case Intrinsic::amdgcn_strict_wwm:
2770 Opcode = AMDGPU::STRICT_WWM;
2772 case Intrinsic::amdgcn_strict_wqm:
2773 Opcode = AMDGPU::STRICT_WQM;
2775 case Intrinsic::amdgcn_interp_p1_f16:
2776 SelectInterpP1F16(
N);
2778 case Intrinsic::amdgcn_inverse_ballot:
2779 switch (
N->getOperand(1).getValueSizeInBits()) {
2781 Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
2784 Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
2795 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2802 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2807void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2808 unsigned IntrID =
N->getConstantOperandVal(1);
2810 case Intrinsic::amdgcn_ds_gws_init:
2811 case Intrinsic::amdgcn_ds_gws_barrier:
2812 case Intrinsic::amdgcn_ds_gws_sema_v:
2813 case Intrinsic::amdgcn_ds_gws_sema_br:
2814 case Intrinsic::amdgcn_ds_gws_sema_p:
2815 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2816 SelectDS_GWS(
N, IntrID);
2825void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2829 {N->getOperand(0), Log2WaveSize});
2832void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2849 if (
N->isDivergent()) {
2856 {SrcVal, Log2WaveSize}),
2864bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2866 bool IsCanonicalizing,
2867 bool AllowAbs)
const {
2873 Src = Src.getOperand(0);
2874 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2877 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2878 if (LHS &&
LHS->isZero()) {
2880 Src = Src.getOperand(1);
2884 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2886 Src = Src.getOperand(0);
2895 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2904bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2907 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2916bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2919 if (SelectVOP3ModsImpl(In, Src, Mods,
2929bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2937bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2941 if (SelectVOP3ModsImpl(In, Src, Mods,
2953bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2955 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2958bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2960 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2963bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2970 return SelectVOP3Mods(In, Src, SrcMods);
2973bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2980 return SelectVOP3BMods(In, Src, SrcMods);
2983bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
2994bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
2995 SDValue &SrcMods,
bool IsDOT)
const {
3002 Src = Src.getOperand(0);
3007 unsigned VecMods = Mods;
3009 SDValue Lo = stripBitcast(Src.getOperand(0));
3010 SDValue Hi = stripBitcast(Src.getOperand(1));
3013 Lo = stripBitcast(
Lo.getOperand(0));
3018 Hi = stripBitcast(
Hi.getOperand(0));
3028 unsigned VecSize = Src.getValueSizeInBits();
3029 Lo = stripExtractLoElt(
Lo);
3030 Hi = stripExtractLoElt(
Hi);
3032 if (
Lo.getValueSizeInBits() > VecSize) {
3034 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3038 if (
Hi.getValueSizeInBits() > VecSize) {
3040 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3044 assert(
Lo.getValueSizeInBits() <= VecSize &&
3045 Hi.getValueSizeInBits() <= VecSize);
3047 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3051 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
3054 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3059 Lo.getValueType()), 0);
3060 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3061 : AMDGPU::SReg_64RegClassID;
3068 Src.getValueType(), Ops), 0);
3074 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3075 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3076 .bitcastToAPInt().getZExtValue();
3094bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3096 return SelectVOP3PMods(In, Src, SrcMods,
true);
3099bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3103 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3106 unsigned SrcSign =
C->getZExtValue();
3114bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3117 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3120 unsigned SrcVal =
C->getZExtValue();
3131 unsigned DstRegClass;
3133 switch (Elts.
size()) {
3135 DstRegClass = AMDGPU::VReg_256RegClassID;
3139 DstRegClass = AMDGPU::VReg_128RegClassID;
3143 DstRegClass = AMDGPU::VReg_64RegClassID;
3152 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3164 assert(
"unhandled Reg sequence size" &&
3165 (Elts.
size() == 8 || Elts.
size() == 16));
3169 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3170 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3178 {Elts[i + 1], Elts[i], PackLoLo});
3188 const SDLoc &
DL,
unsigned ElementSize) {
3189 if (ElementSize == 16)
3191 if (ElementSize == 32)
3199 unsigned ElementSize) {
3204 for (
auto El : Elts) {
3207 NegAbsElts.
push_back(El->getOperand(0));
3209 if (Elts.size() != NegAbsElts.
size()) {
3229 std::function<
bool(
SDValue)> ModifierCheck) {
3232 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3233 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3234 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3235 if (!ModifierCheck(ElF16))
3242bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3248 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3267 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3289bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3296 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3300 if (EltsF16.
empty())
3315 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3321 if (EltsV2F16.
empty())
3338bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3344 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3348 unsigned ModOpcode =
3367bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3368 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3371 if (isInlineImmediate(
Splat.getNode())) {
3373 unsigned Imm =
C->getAPIntValue().getSExtValue();
3378 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3387 SDValue SplatSrc32 = stripBitcast(In);
3388 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3389 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3390 SDValue SplatSrc16 = stripBitcast(Splat32);
3391 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3394 std::optional<APInt> RawValue;
3396 RawValue =
C->getValueAPF().bitcastToAPInt();
3398 RawValue =
C->getAPIntValue();
3400 if (RawValue.has_value()) {
3401 EVT VT =
In.getValueType().getScalarType();
3407 if (
TII->isInlineConstant(FloatVal)) {
3413 if (
TII->isInlineConstant(RawValue.value())) {
3427bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3446bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3465bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3473bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3476 return SelectVOP3Mods(In, Src, SrcMods);
3481bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3482 unsigned &Mods)
const {
3484 SelectVOP3ModsImpl(In, Src, Mods);
3487 Src = Src.getOperand(0);
3488 assert(Src.getValueType() == MVT::f16);
3489 Src = stripBitcast(Src);
3495 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3522bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3525 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3531bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3534 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3551 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3561bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3570 bool AllUsesAcceptSReg =
true;
3572 Limit < 10 &&
U != E; ++
U, ++Limit) {
3581 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3582 AllUsesAcceptSReg =
false;
3584 if (
User->isMachineOpcode()) {
3585 unsigned Opc =
User->getMachineOpcode();
3587 if (
Desc.isCommutable()) {
3588 unsigned OpIdx =
Desc.getNumDefs() +
U.getOperandNo();
3591 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3593 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3594 CommutedRC == &AMDGPU::VS_64RegClass)
3595 AllUsesAcceptSReg =
true;
3603 if (!AllUsesAcceptSReg)
3607 return !AllUsesAcceptSReg && (Limit < 10);
3610bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3611 auto Ld = cast<LoadSDNode>(
N);
3627 ->isMemOpHasNoClobberedMemOperand(
N)));
3633 bool IsModified =
false;
3640 SDNode *Node = &*Position++;
3646 if (ResNode != Node) {
3653 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static MachineSDNode * buildRegSequence(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static bool isNoUnsignedWrap(SDValue Addr)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< SDValue > &Elts, SDValue &Src, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
pre isel intrinsic Pre ISel Intrinsic Lowering
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
support::ulittle16_t & Lo
support::ulittle16_t & Hi
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
AMDGPUDAGToDAGISel()=delete
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static bool EnableLateStructurizeCFG
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
uint64_t getZExtValue() const
Get zero extended value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
unsigned M0(unsigned Val)
Implement std::hash so that hash_code can be used in STL containers.
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.