29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#ifdef EXPENSIVE_CHECKS
37#define DEBUG_TYPE "amdgpu-isel"
52 In = stripBitcast(In);
58 Out = In.getOperand(0);
69 if (ShiftAmt->getZExtValue() == 16) {
85 return In.getOperand(0);
90 if (Src.getValueType().getSizeInBits() == 32)
91 return stripBitcast(Src);
100 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
105#ifdef EXPENSIVE_CHECKS
110 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
131bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
195#ifdef EXPENSIVE_CHECKS
196 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
197 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
199 assert(L->isLCSSAForm(DT));
208#ifdef EXPENSIVE_CHECKS
217 MVT VT =
N->getValueType(0).getSimpleVT();
218 if (VT != MVT::v2i16 && VT != MVT::v2f16)
224 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
261 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
262 if (LdLo &&
Lo.hasOneUse()) {
301 bool MadeChange =
false;
307 switch (
N->getOpcode()) {
324bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
330 return TII->isInlineConstant(
C->getAPIntValue());
333 return TII->isInlineConstant(
C->getValueAPF());
343 unsigned OpNo)
const {
344 if (!
N->isMachineOpcode()) {
346 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
347 if (Reg.isVirtual()) {
349 return MRI.getRegClass(Reg);
353 return TRI->getPhysRegBaseClass(Reg);
359 switch (
N->getMachineOpcode()) {
363 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
364 if (OpIdx >=
Desc.getNumOperands())
366 int RegClass =
Desc.operands()[OpIdx].RegClass;
372 case AMDGPU::REG_SEQUENCE: {
373 unsigned RCID =
N->getConstantOperandVal(0);
377 SDValue SubRegOp =
N->getOperand(OpNo + 1);
387 SmallVector <SDValue, 8> Ops;
389 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
400 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
403 return glueCopyToOp(
N,
M0,
M0.getValue(1));
406SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
407 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
424 AMDGPU::S_MOV_B32,
DL, MVT::i32,
427 AMDGPU::S_MOV_B32,
DL, MVT::i32,
438 EVT VT =
N->getValueType(0);
444 if (NumVectorElts == 1) {
450 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
460 bool IsRegSeq =
true;
461 unsigned NOps =
N->getNumOperands();
462 for (
unsigned i = 0; i < NOps; i++) {
464 if (isa<RegisterSDNode>(
N->getOperand(i))) {
470 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
473 if (NOps != NumVectorElts) {
478 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
481 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
482 RegSeqArgs[1 + (2 * i) + 1] =
493 unsigned int Opc =
N->getOpcode();
494 if (
N->isMachineOpcode()) {
502 N = glueCopyToM0LDSInit(
N);
517 if (
N->getValueType(0) != MVT::i64)
520 SelectADD_SUB_I64(
N);
525 if (
N->getValueType(0) != MVT::i32)
532 SelectUADDO_USUBO(
N);
536 SelectFMUL_W_CHAIN(
N);
540 SelectFMA_W_CHAIN(
N);
546 EVT VT =
N->getValueType(0);
560 unsigned RegClassID =
568 if (
N->getValueType(0) == MVT::i128) {
572 }
else if (
N->getValueType(0) == MVT::i64) {
579 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
580 N->getOperand(1), SubReg1 };
582 N->getValueType(0), Ops));
588 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
593 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
598 Imm =
C->getZExtValue();
645 return SelectMUL_LOHI(
N);
656 if (
N->getValueType(0) != MVT::i32)
673 if (
N->getValueType(0) == MVT::i32) {
676 { N->getOperand(0), N->getOperand(1) });
684 SelectINTRINSIC_W_CHAIN(
N);
688 SelectINTRINSIC_WO_CHAIN(
N);
692 SelectINTRINSIC_VOID(
N);
696 SelectWAVE_ADDRESS(
N);
700 SelectSTACKRESTORE(
N);
708bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
711 return Term->getMetadata(
"amdgpu.uniform") ||
712 Term->getMetadata(
"structurizecfg.uniform");
715bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
716 unsigned ShAmtBits)
const {
719 const APInt &
RHS =
N->getConstantOperandAPInt(1);
720 if (
RHS.countr_one() >= ShAmtBits)
724 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
750 N1 =
Lo.getOperand(1);
767 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
775 return "AMDGPU DAG->DAG Pattern Instruction Selection";
785#ifdef EXPENSIVE_CHECKS
791 for (
auto &L : LI.getLoopsInPreorder())
792 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
811 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
815 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
819 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
830SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
833 AMDGPU::S_MOV_B32,
DL, MVT::i32,
839void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
844 unsigned Opcode =
N->getOpcode();
854 DL, MVT::i32, LHS, Sub0);
856 DL, MVT::i32, LHS, Sub1);
859 DL, MVT::i32, RHS, Sub0);
861 DL, MVT::i32, RHS, Sub1);
865 static const unsigned OpcMap[2][2][2] = {
866 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
867 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
868 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
869 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
871 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
872 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
897 MVT::i64, RegSequenceArgs);
908void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
914 if (
N->isDivergent()) {
916 : AMDGPU::V_SUBB_U32_e64;
918 N, Opc,
N->getVTList(),
920 CurDAG->getTargetConstant(0, {}, MVT::i1) });
923 : AMDGPU::S_SUB_CO_PSEUDO;
924 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
928void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
933 bool IsVALU =
N->isDivergent();
937 if (UI.getUse().getResNo() == 1) {
946 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
949 N, Opc,
N->getVTList(),
950 {N->getOperand(0), N->getOperand(1),
951 CurDAG->getTargetConstant(0, {}, MVT::i1) });
953 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
954 : AMDGPU::S_USUBO_PSEUDO;
956 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
957 {
N->getOperand(0),
N->getOperand(1)});
961void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
966 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
967 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
968 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
969 Ops[8] =
N->getOperand(0);
970 Ops[9] =
N->getOperand(4);
975 cast<ConstantSDNode>(Ops[0])->isZero() &&
976 cast<ConstantSDNode>(Ops[2])->isZero() &&
977 cast<ConstantSDNode>(Ops[4])->isZero();
978 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
982void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
987 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
988 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
989 Ops[6] =
N->getOperand(0);
990 Ops[7] =
N->getOperand(3);
997void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
999 EVT VT =
N->getValueType(0);
1001 assert(VT == MVT::f32 || VT == MVT::f64);
1004 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1009 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1010 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1011 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1017void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1022 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1023 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1025 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1028 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1035void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1040 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1041 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1043 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1047 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1052 MVT::i32,
SDValue(Mad, 0), Sub0);
1058 MVT::i32,
SDValue(Mad, 0), Sub1);
1093 int64_t ByteOffset =
C->getSExtValue();
1094 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1101 Zero,
Addr.getOperand(1));
1103 if (isDSOffsetLegal(Sub, ByteOffset)) {
1109 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1111 SubOp = AMDGPU::V_SUB_U32_e64;
1133 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1136 DL, MVT::i32, Zero);
1149bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1151 unsigned Size)
const {
1152 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1154 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1169 Addr->getFlags().hasNoUnsignedWrap()) ||
1176bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1185 auto LHS =
Addr.getOperand(0);
1186 auto RHS =
Addr.getOperand(1);
1193 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1203bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1212 auto LHS =
Addr.getOperand(0);
1213 auto RHS =
Addr.getOperand(1);
1219bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1226 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1233 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1236 auto LHS =
Base.getOperand(0);
1237 auto RHS =
Base.getOperand(1);
1245 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1251 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1256 unsigned Size)
const {
1264 unsigned OffsetValue1 = OffsetValue0 +
Size;
1267 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1276 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1277 unsigned OffsetValue0 =
C->getZExtValue();
1278 unsigned OffsetValue1 = OffsetValue0 +
Size;
1280 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1290 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1294 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1296 SubOp = AMDGPU::V_SUB_U32_e64;
1314 unsigned OffsetValue0 = CAddr->getZExtValue();
1315 unsigned OffsetValue1 = OffsetValue0 +
Size;
1317 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1357 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1359 N0 =
Addr.getOperand(0);
1418 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1434 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1438 if (
C->getSExtValue()) {
1451std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1454 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1465bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1477 int64_t
Imm = CAddr->getSExtValue();
1478 const int64_t NullPtr =
1481 if (Imm != NullPtr) {
1486 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1487 VAddr =
SDValue(MovHighBits, 0);
1517 if (
TII->isLegalMUBUFImmOffset(C1) &&
1520 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1527 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1535 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1536 if (!Reg.isPhysical())
1538 const auto *RC =
TRI.getPhysRegBaseClass(Reg);
1539 return RC &&
TRI.isSGPRClass(RC);
1542bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1565 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1571 SOffset =
Addr.getOperand(0);
1572 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1592 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1595 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1596 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1597 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1599 maskTrailingOnes<uint64_t>(32);
1611bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1618 SOffset = ByteOffsetNode;
1628 assert(isa<BuildVectorSDNode>(
N));
1639 int64_t OffsetVal = 0;
1643 bool CanHaveFlatSegmentOffsetBug =
1650 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1652 isFlatScratchBaseLegal(
Addr))) {
1653 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1656 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1658 OffsetVal = COffsetVal;
1673 std::tie(OffsetVal, RemainderOffset) =
1674 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1677 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1680 if (
Addr.getValueType().getSizeInBits() == 32) {
1684 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1686 AddOp = AMDGPU::V_ADD_U32_e64;
1697 DL, MVT::i32, N0, Sub0);
1699 DL, MVT::i32, N0, Sub1);
1702 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1708 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1711 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1719 MVT::i64, RegSequenceArgs),
1746 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1760bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1765 int64_t ImmOffset = 0;
1771 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1772 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1778 ImmOffset = COffsetVal;
1779 }
else if (!
LHS->isDivergent()) {
1780 if (COffsetVal > 0) {
1785 int64_t SplitImmOffset, RemainderOffset;
1786 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1789 if (isUInt<32>(RemainderOffset)) {
1791 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1805 unsigned NumLiterals =
1818 if (!
LHS->isDivergent()) {
1826 if (!SAddr && !
RHS->isDivergent()) {
1841 isa<ConstantSDNode>(
Addr))
1856 if (
auto *FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1859 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1862 auto *FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1864 FI->getValueType(0));
1877 if (
Addr->isDivergent())
1882 int64_t COffsetVal = 0;
1885 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1886 SAddr =
Addr.getOperand(0);
1897 int64_t SplitImmOffset, RemainderOffset;
1898 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1901 COffsetVal = SplitImmOffset;
1905 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1906 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL,
MVT::i32);
1918bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1933 return (VMax & 3) + (
SMax & 3) >= 4;
1939 int64_t ImmOffset = 0;
1943 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1944 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1949 ImmOffset = COffsetVal;
1950 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1954 int64_t SplitImmOffset, RemainderOffset;
1955 std::tie(SplitImmOffset, RemainderOffset)
1958 if (isUInt<32>(RemainderOffset)) {
1960 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1964 if (!isFlatScratchBaseLegal(
Addr))
1966 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1980 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1983 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1990 if (OrigAddr !=
Addr) {
1991 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1994 if (!isFlatScratchBaseLegalSV(OrigAddr))
1998 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2008bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2011 int64_t ImmOffset)
const {
2012 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2025bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
2027 bool Imm32Only,
bool IsBuffer,
2029 int64_t ImmOffset)
const {
2031 "Cannot match both soffset and offset at the same time!");
2040 *SOffset = ByteOffsetNode;
2041 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2047 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2054 SDLoc SL(ByteOffsetNode);
2058 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2060 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2061 if (EncodedOffset &&
Offset && !Imm32Only) {
2071 if (EncodedOffset &&
Offset && Imm32Only) {
2076 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2090 if (
Addr.getValueType() != MVT::i32)
2098 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2119 bool Imm32Only,
bool IsBuffer,
2121 int64_t ImmOffset)
const {
2123 assert(!Imm32Only && !IsBuffer);
2126 if (!SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset,
false,
false,
true))
2131 ImmOff =
C->getSExtValue();
2133 return SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr,
false,
false,
true,
2140 !
Addr->getFlags().hasNoUnsignedWrap())
2146 N0 =
Addr.getOperand(0);
2147 N1 =
Addr.getOperand(1);
2149 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2154 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2159 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2169 bool Imm32Only)
const {
2170 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2171 SBase = Expand32BitAddress(SBase);
2175 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2186 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2192 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2198 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2204 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2208 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2212bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2215 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2219bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2223 return N.getValueType() == MVT::i32 &&
2224 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2229bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2251 if (isa<ConstantSDNode>(Index))
2259SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2263 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2269 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2279void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2284 const SDValue &Shl =
N->getOperand(0);
2292 if (0 < BVal && BVal <= CVal && CVal < 32) {
2302void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2303 switch (
N->getOpcode()) {
2305 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2308 const SDValue &Srl =
N->getOperand(0);
2312 if (Shift && Mask) {
2326 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2333 if (Shift && Mask) {
2344 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2345 SelectS_BFEFromShifts(
N);
2350 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2351 SelectS_BFEFromShifts(
N);
2362 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2366 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2376bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2378 if (!
N->hasOneUse())
2388 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2392 if (VT == MVT::i64) {
2415 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2419 auto Cond = VCMP.getOperand(0);
2431void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2434 if (
Cond.isUndef()) {
2436 N->getOperand(2),
N->getOperand(0));
2442 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2443 bool AndExec = !UseSCCBr;
2444 bool Negate =
false;
2449 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2462 bool NegatedBallot =
false;
2465 UseSCCBr = !BallotCond->isDivergent();
2466 Negate = Negate ^ NegatedBallot;
2481 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2482 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2483 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2502 Subtarget->
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2517void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2519 !
N->isDivergent()) {
2521 if (Src.getValueType() == MVT::f16) {
2533void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2536 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2537 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2551 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2552 N = glueCopyToM0(
N, PtrBase);
2558 N = glueCopyToM0(
N,
Ptr);
2566 N->getOperand(
N->getNumOperands() - 1)
2575void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2576 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2577 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2578 N->getOperand(5),
N->getOperand(0)};
2588 case Intrinsic::amdgcn_ds_gws_init:
2589 return AMDGPU::DS_GWS_INIT;
2590 case Intrinsic::amdgcn_ds_gws_barrier:
2591 return AMDGPU::DS_GWS_BARRIER;
2592 case Intrinsic::amdgcn_ds_gws_sema_v:
2593 return AMDGPU::DS_GWS_SEMA_V;
2594 case Intrinsic::amdgcn_ds_gws_sema_br:
2595 return AMDGPU::DS_GWS_SEMA_BR;
2596 case Intrinsic::amdgcn_ds_gws_sema_p:
2597 return AMDGPU::DS_GWS_SEMA_P;
2598 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2599 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2605void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2606 if (!Subtarget->
hasGWS() ||
2607 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2615 const bool HasVSrc =
N->getNumOperands() == 4;
2616 assert(HasVSrc ||
N->getNumOperands() == 3);
2619 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2630 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2636 ImmOffset = ConstOffset->getZExtValue();
2654 glueCopyToM0(
N,
SDValue(M0Base, 0));
2671void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2729void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2730 unsigned IntrID =
N->getConstantOperandVal(1);
2732 case Intrinsic::amdgcn_ds_append:
2733 case Intrinsic::amdgcn_ds_consume: {
2734 if (
N->getValueType(0) != MVT::i32)
2736 SelectDSAppendConsume(
N, IntrID);
2739 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2740 SelectDSBvhStackIntrinsic(
N);
2742 case Intrinsic::amdgcn_init_whole_wave:
2745 ->setInitWholeWave();
2752void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2753 unsigned IntrID =
N->getConstantOperandVal(0);
2754 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2755 SDNode *ConvGlueNode =
N->getGluedNode();
2762 MVT::Glue,
SDValue(ConvGlueNode, 0));
2764 ConvGlueNode =
nullptr;
2767 case Intrinsic::amdgcn_wqm:
2768 Opcode = AMDGPU::WQM;
2770 case Intrinsic::amdgcn_softwqm:
2771 Opcode = AMDGPU::SOFT_WQM;
2773 case Intrinsic::amdgcn_wwm:
2774 case Intrinsic::amdgcn_strict_wwm:
2775 Opcode = AMDGPU::STRICT_WWM;
2777 case Intrinsic::amdgcn_strict_wqm:
2778 Opcode = AMDGPU::STRICT_WQM;
2780 case Intrinsic::amdgcn_interp_p1_f16:
2781 SelectInterpP1F16(
N);
2783 case Intrinsic::amdgcn_permlane16_swap:
2784 case Intrinsic::amdgcn_permlane32_swap: {
2785 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
2787 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
2793 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
2794 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
2795 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
2799 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2801 bool FI =
N->getConstantOperandVal(3);
2813 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2820 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2825void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2826 unsigned IntrID =
N->getConstantOperandVal(1);
2828 case Intrinsic::amdgcn_ds_gws_init:
2829 case Intrinsic::amdgcn_ds_gws_barrier:
2830 case Intrinsic::amdgcn_ds_gws_sema_v:
2831 case Intrinsic::amdgcn_ds_gws_sema_br:
2832 case Intrinsic::amdgcn_ds_gws_sema_p:
2833 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2834 SelectDS_GWS(
N, IntrID);
2843void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2847 {N->getOperand(0), Log2WaveSize});
2850void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2867 if (
N->isDivergent()) {
2874 {SrcVal, Log2WaveSize}),
2882bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2884 bool IsCanonicalizing,
2885 bool AllowAbs)
const {
2891 Src = Src.getOperand(0);
2892 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2895 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2896 if (LHS &&
LHS->isZero()) {
2898 Src = Src.getOperand(1);
2902 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2904 Src = Src.getOperand(0);
2913 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2922bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2925 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2934bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2937 if (SelectVOP3ModsImpl(In, Src, Mods,
2947bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2955bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2959 if (SelectVOP3ModsImpl(In, Src, Mods,
2971bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2973 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2976bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2978 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2981bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2988 return SelectVOP3Mods(In, Src, SrcMods);
2991bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2998 return SelectVOP3BMods(In, Src, SrcMods);
3001bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3012bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3013 SDValue &SrcMods,
bool IsDOT)
const {
3020 Src = Src.getOperand(0);
3025 unsigned VecMods = Mods;
3027 SDValue Lo = stripBitcast(Src.getOperand(0));
3028 SDValue Hi = stripBitcast(Src.getOperand(1));
3031 Lo = stripBitcast(
Lo.getOperand(0));
3036 Hi = stripBitcast(
Hi.getOperand(0));
3046 unsigned VecSize = Src.getValueSizeInBits();
3047 Lo = stripExtractLoElt(
Lo);
3048 Hi = stripExtractLoElt(
Hi);
3050 if (
Lo.getValueSizeInBits() > VecSize) {
3052 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3056 if (
Hi.getValueSizeInBits() > VecSize) {
3058 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3062 assert(
Lo.getValueSizeInBits() <= VecSize &&
3063 Hi.getValueSizeInBits() <= VecSize);
3065 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3069 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
3072 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3077 Lo.getValueType()), 0);
3078 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3079 : AMDGPU::SReg_64RegClassID;
3086 Src.getValueType(), Ops), 0);
3092 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3093 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3094 .bitcastToAPInt().getZExtValue();
3112bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3114 return SelectVOP3PMods(In, Src, SrcMods,
true);
3117bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3121 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3124 unsigned SrcSign =
C->getZExtValue();
3132bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3135 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3138 unsigned SrcVal =
C->getZExtValue();
3149 unsigned DstRegClass;
3151 switch (Elts.
size()) {
3153 DstRegClass = AMDGPU::VReg_256RegClassID;
3157 DstRegClass = AMDGPU::VReg_128RegClassID;
3161 DstRegClass = AMDGPU::VReg_64RegClassID;
3170 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3182 assert(
"unhandled Reg sequence size" &&
3183 (Elts.
size() == 8 || Elts.
size() == 16));
3187 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3188 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3196 {Elts[i + 1], Elts[i], PackLoLo});
3206 const SDLoc &
DL,
unsigned ElementSize) {
3207 if (ElementSize == 16)
3209 if (ElementSize == 32)
3217 unsigned ElementSize) {
3222 for (
auto El : Elts) {
3225 NegAbsElts.
push_back(El->getOperand(0));
3227 if (Elts.size() != NegAbsElts.
size()) {
3247 std::function<
bool(
SDValue)> ModifierCheck) {
3250 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3251 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3252 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3253 if (!ModifierCheck(ElF16))
3260bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3266 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3285 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3307bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3314 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3318 if (EltsF16.
empty())
3333 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3339 if (EltsV2F16.
empty())
3356bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3362 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3366 unsigned ModOpcode =
3385bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3386 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3389 if (isInlineImmediate(
Splat.getNode())) {
3391 unsigned Imm =
C->getAPIntValue().getSExtValue();
3396 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3405 SDValue SplatSrc32 = stripBitcast(In);
3406 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3407 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3408 SDValue SplatSrc16 = stripBitcast(Splat32);
3409 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3412 std::optional<APInt> RawValue;
3414 RawValue =
C->getValueAPF().bitcastToAPInt();
3416 RawValue =
C->getAPIntValue();
3418 if (RawValue.has_value()) {
3419 EVT VT =
In.getValueType().getScalarType();
3425 if (
TII->isInlineConstant(FloatVal)) {
3431 if (
TII->isInlineConstant(RawValue.value())) {
3445bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3464bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3483bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3491bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3494 return SelectVOP3Mods(In, Src, SrcMods);
3499bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3500 unsigned &Mods)
const {
3502 SelectVOP3ModsImpl(In, Src, Mods);
3505 Src = Src.getOperand(0);
3506 assert(Src.getValueType() == MVT::f16);
3507 Src = stripBitcast(Src);
3513 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3540bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3543 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3549bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3552 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3561 unsigned NumOpcodes = 0;
3574 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3576 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
3577 if (
C->isAllOnes()) {
3587 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3601 if (Src.size() == 3) {
3606 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3607 if (
C->isAllOnes()) {
3609 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3610 if (Src[
I] ==
LHS) {
3622 Bits = SrcBits[Src.size()];
3627 switch (In.getOpcode()) {
3635 if (!getOperandBits(
LHS, LHSBits) ||
3636 !getOperandBits(
RHS, RHSBits)) {
3638 return std::make_pair(0, 0);
3644 NumOpcodes +=
Op.first;
3645 LHSBits =
Op.second;
3650 NumOpcodes +=
Op.first;
3651 RHSBits =
Op.second;
3656 return std::make_pair(0, 0);
3660 switch (In.getOpcode()) {
3662 TTbl = LHSBits & RHSBits;
3665 TTbl = LHSBits | RHSBits;
3668 TTbl = LHSBits ^ RHSBits;
3674 return std::make_pair(NumOpcodes + 1, TTbl);
3681 unsigned NumOpcodes;
3683 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
3687 if (NumOpcodes < 2 || Src.empty())
3693 if (NumOpcodes < 4 && !In->isDivergent())
3696 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
3701 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
3702 In.getOperand(1).getOpcode() ==
In.getOpcode()))
3716 while (Src.size() < 3)
3717 Src.push_back(Src[0]);
3739 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3749bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3758 bool AllUsesAcceptSReg =
true;
3760 Limit < 10 &&
U != E; ++
U, ++Limit) {
3762 getOperandRegClass(
U->getUser(),
U->getOperandNo());
3770 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3771 AllUsesAcceptSReg =
false;
3773 if (
User->isMachineOpcode()) {
3774 unsigned Opc =
User->getMachineOpcode();
3776 if (
Desc.isCommutable()) {
3777 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
3780 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3782 getOperandRegClass(
U->getUser(), CommutedOpNo);
3783 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3784 CommutedRC == &AMDGPU::VS_64RegClass)
3785 AllUsesAcceptSReg =
true;
3793 if (!AllUsesAcceptSReg)
3797 return !AllUsesAcceptSReg && (Limit < 10);
3800bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3801 const auto *Ld = cast<LoadSDNode>(
N);
3817 ->isMemOpHasNoClobberedMemOperand(
N)));
3823 bool IsModified =
false;
3830 SDNode *Node = &*Position++;
3836 if (ResNode != Node) {
3843 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDValue getRegister(Register Reg, EVT VT)
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Implement std::hash so that hash_code can be used in STL containers.
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.