29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#ifdef EXPENSIVE_CHECKS
37#define DEBUG_TYPE "amdgpu-isel"
52 In = stripBitcast(In);
58 Out = In.getOperand(0);
69 if (ShiftAmt->getZExtValue() == 16) {
85 return In.getOperand(0);
90 if (Src.getValueType().getSizeInBits() == 32)
91 return stripBitcast(Src);
100 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
105#ifdef EXPENSIVE_CHECKS
110 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
131bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
195#ifdef EXPENSIVE_CHECKS
196 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
197 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
199 assert(L->isLCSSAForm(DT));
208#ifdef EXPENSIVE_CHECKS
217 MVT VT =
N->getValueType(0).getSimpleVT();
218 if (VT != MVT::v2i16 && VT != MVT::v2f16)
224 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
261 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
262 if (LdLo &&
Lo.hasOneUse()) {
301 bool MadeChange =
false;
307 switch (
N->getOpcode()) {
324bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
330 return TII->isInlineConstant(
C->getAPIntValue());
333 return TII->isInlineConstant(
C->getValueAPF());
343 unsigned OpNo)
const {
344 if (!
N->isMachineOpcode()) {
346 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
347 if (Reg.isVirtual()) {
349 return MRI.getRegClass(Reg);
353 return TRI->getPhysRegBaseClass(Reg);
359 switch (
N->getMachineOpcode()) {
363 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
364 if (OpIdx >=
Desc.getNumOperands())
366 int RegClass =
Desc.operands()[OpIdx].RegClass;
372 case AMDGPU::REG_SEQUENCE: {
373 unsigned RCID =
N->getConstantOperandVal(0);
377 SDValue SubRegOp =
N->getOperand(OpNo + 1);
387 SmallVector <SDValue, 8> Ops;
389 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
400 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
403 return glueCopyToOp(
N,
M0,
M0.getValue(1));
406SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
407 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
424 AMDGPU::S_MOV_B32,
DL, MVT::i32,
427 AMDGPU::S_MOV_B32,
DL, MVT::i32,
438 EVT VT =
N->getValueType(0);
444 if (NumVectorElts == 1) {
450 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
460 bool IsRegSeq =
true;
461 unsigned NOps =
N->getNumOperands();
462 for (
unsigned i = 0; i < NOps; i++) {
464 if (isa<RegisterSDNode>(
N->getOperand(i))) {
470 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
473 if (NOps != NumVectorElts) {
478 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
481 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
482 RegSeqArgs[1 + (2 * i) + 1] =
493 unsigned int Opc =
N->getOpcode();
494 if (
N->isMachineOpcode()) {
502 N = glueCopyToM0LDSInit(
N);
517 if (
N->getValueType(0) != MVT::i64)
520 SelectADD_SUB_I64(
N);
525 if (
N->getValueType(0) != MVT::i32)
532 SelectUADDO_USUBO(
N);
536 SelectFMUL_W_CHAIN(
N);
540 SelectFMA_W_CHAIN(
N);
546 EVT VT =
N->getValueType(0);
560 unsigned RegClassID =
568 if (
N->getValueType(0) == MVT::i128) {
572 }
else if (
N->getValueType(0) == MVT::i64) {
579 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
580 N->getOperand(1), SubReg1 };
582 N->getValueType(0), Ops));
588 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
593 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
598 Imm =
C->getZExtValue();
645 return SelectMUL_LOHI(
N);
656 if (
N->getValueType(0) != MVT::i32)
673 if (
N->getValueType(0) == MVT::i32) {
676 { N->getOperand(0), N->getOperand(1) });
684 SelectINTRINSIC_W_CHAIN(
N);
688 SelectINTRINSIC_WO_CHAIN(
N);
692 SelectINTRINSIC_VOID(
N);
696 SelectWAVE_ADDRESS(
N);
700 SelectSTACKRESTORE(
N);
708bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
711 return Term->getMetadata(
"amdgpu.uniform") ||
712 Term->getMetadata(
"structurizecfg.uniform");
715bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
716 unsigned ShAmtBits)
const {
719 const APInt &
RHS =
N->getConstantOperandAPInt(1);
720 if (
RHS.countr_one() >= ShAmtBits)
724 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
750 N1 =
Lo.getOperand(1);
767 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
775 return "AMDGPU DAG->DAG Pattern Instruction Selection";
785#ifdef EXPENSIVE_CHECKS
791 for (
auto &L : LI.getLoopsInPreorder())
792 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
811 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
815 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
819 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
830SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
833 AMDGPU::S_MOV_B32,
DL, MVT::i32,
839void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
844 unsigned Opcode =
N->getOpcode();
854 DL, MVT::i32, LHS, Sub0);
856 DL, MVT::i32, LHS, Sub1);
859 DL, MVT::i32, RHS, Sub0);
861 DL, MVT::i32, RHS, Sub1);
865 static const unsigned OpcMap[2][2][2] = {
866 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
867 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
868 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
869 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
871 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
872 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
897 MVT::i64, RegSequenceArgs);
908void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
914 if (
N->isDivergent()) {
916 : AMDGPU::V_SUBB_U32_e64;
918 N, Opc,
N->getVTList(),
920 CurDAG->getTargetConstant(0, {}, MVT::i1) });
923 : AMDGPU::S_SUB_CO_PSEUDO;
924 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
928void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
933 bool IsVALU =
N->isDivergent();
937 if (UI.getUse().getResNo() == 1) {
946 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
949 N, Opc,
N->getVTList(),
950 {N->getOperand(0), N->getOperand(1),
951 CurDAG->getTargetConstant(0, {}, MVT::i1) });
953 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
954 : AMDGPU::S_USUBO_PSEUDO;
956 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
957 {
N->getOperand(0),
N->getOperand(1)});
961void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
966 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
967 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
968 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
969 Ops[8] =
N->getOperand(0);
970 Ops[9] =
N->getOperand(4);
975 cast<ConstantSDNode>(Ops[0])->isZero() &&
976 cast<ConstantSDNode>(Ops[2])->isZero() &&
977 cast<ConstantSDNode>(Ops[4])->isZero();
978 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
982void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
987 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
988 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
989 Ops[6] =
N->getOperand(0);
990 Ops[7] =
N->getOperand(3);
997void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
999 EVT VT =
N->getValueType(0);
1001 assert(VT == MVT::f32 || VT == MVT::f64);
1004 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1009 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1010 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1011 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1017void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1022 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1023 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1025 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1028 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1035void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1040 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1041 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1043 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1047 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1053 MVT::i32,
SDValue(Mad, 0), Sub0);
1059 MVT::i32,
SDValue(Mad, 0), Sub1);
1094 int64_t ByteOffset =
C->getSExtValue();
1095 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1102 Zero,
Addr.getOperand(1));
1104 if (isDSOffsetLegal(Sub, ByteOffset)) {
1110 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1112 SubOp = AMDGPU::V_SUB_U32_e64;
1134 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1137 DL, MVT::i32, Zero);
1150bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1152 unsigned Size)
const {
1153 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1155 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1170 Addr->getFlags().hasNoUnsignedWrap()) ||
1177bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1186 auto LHS =
Addr.getOperand(0);
1187 auto RHS =
Addr.getOperand(1);
1194 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1204bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1213 auto LHS =
Addr.getOperand(0);
1214 auto RHS =
Addr.getOperand(1);
1220bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1227 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1234 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1237 auto LHS =
Base.getOperand(0);
1238 auto RHS =
Base.getOperand(1);
1246 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1252 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1257 unsigned Size)
const {
1265 unsigned OffsetValue1 = OffsetValue0 +
Size;
1268 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1277 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1278 unsigned OffsetValue0 =
C->getZExtValue();
1279 unsigned OffsetValue1 = OffsetValue0 +
Size;
1281 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1291 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1295 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1297 SubOp = AMDGPU::V_SUB_U32_e64;
1315 unsigned OffsetValue0 = CAddr->getZExtValue();
1316 unsigned OffsetValue1 = OffsetValue0 +
Size;
1318 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1358 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1360 N0 =
Addr.getOperand(0);
1419 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1435 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1439 if (
C->getSExtValue()) {
1452std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1455 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1466bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1478 int64_t
Imm = CAddr->getSExtValue();
1479 const int64_t NullPtr =
1482 if (Imm != NullPtr) {
1487 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1488 VAddr =
SDValue(MovHighBits, 0);
1518 if (
TII->isLegalMUBUFImmOffset(C1) &&
1521 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1528 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1536 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1537 if (!Reg.isPhysical())
1539 const auto *RC =
TRI.getPhysRegBaseClass(Reg);
1540 return RC &&
TRI.isSGPRClass(RC);
1543bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1566 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1572 SOffset =
Addr.getOperand(0);
1573 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1593 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1596 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1597 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1598 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1600 maskTrailingOnes<uint64_t>(32);
1612bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1619 SOffset = ByteOffsetNode;
1629 assert(isa<BuildVectorSDNode>(
N));
1640 int64_t OffsetVal = 0;
1644 bool CanHaveFlatSegmentOffsetBug =
1651 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1653 isFlatScratchBaseLegal(
Addr))) {
1654 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1657 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1659 OffsetVal = COffsetVal;
1674 std::tie(OffsetVal, RemainderOffset) =
1675 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1678 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1681 if (
Addr.getValueType().getSizeInBits() == 32) {
1685 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1687 AddOp = AMDGPU::V_ADD_U32_e64;
1698 DL, MVT::i32, N0, Sub0);
1700 DL, MVT::i32, N0, Sub1);
1703 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1709 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1712 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1720 MVT::i64, RegSequenceArgs),
1747 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1761bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1766 int64_t ImmOffset = 0;
1772 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1773 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1779 ImmOffset = COffsetVal;
1780 }
else if (!
LHS->isDivergent()) {
1781 if (COffsetVal > 0) {
1786 int64_t SplitImmOffset, RemainderOffset;
1787 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1790 if (isUInt<32>(RemainderOffset)) {
1792 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1806 unsigned NumLiterals =
1819 if (!
LHS->isDivergent()) {
1827 if (!SAddr && !
RHS->isDivergent()) {
1842 isa<ConstantSDNode>(
Addr))
1857 if (
auto *FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1860 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1863 auto *FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1865 FI->getValueType(0));
1878 if (
Addr->isDivergent())
1883 int64_t COffsetVal = 0;
1886 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1887 SAddr =
Addr.getOperand(0);
1898 int64_t SplitImmOffset, RemainderOffset;
1899 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1902 COffsetVal = SplitImmOffset;
1906 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1907 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL,
MVT::i32);
1919bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1934 return (VMax & 3) + (
SMax & 3) >= 4;
1940 int64_t ImmOffset = 0;
1944 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1945 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1950 ImmOffset = COffsetVal;
1951 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1955 int64_t SplitImmOffset, RemainderOffset;
1956 std::tie(SplitImmOffset, RemainderOffset)
1959 if (isUInt<32>(RemainderOffset)) {
1961 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1965 if (!isFlatScratchBaseLegal(
Addr))
1967 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1981 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1984 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1991 if (OrigAddr !=
Addr) {
1992 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1995 if (!isFlatScratchBaseLegalSV(OrigAddr))
1999 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2009bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2012 int64_t ImmOffset)
const {
2013 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2026bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
2028 bool Imm32Only,
bool IsBuffer,
2030 int64_t ImmOffset)
const {
2032 "Cannot match both soffset and offset at the same time!");
2041 *SOffset = ByteOffsetNode;
2042 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2048 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2055 SDLoc SL(ByteOffsetNode);
2059 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2061 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2062 if (EncodedOffset &&
Offset && !Imm32Only) {
2072 if (EncodedOffset &&
Offset && Imm32Only) {
2077 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2091 if (
Addr.getValueType() != MVT::i32)
2099 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2120 bool Imm32Only,
bool IsBuffer,
2122 int64_t ImmOffset)
const {
2124 assert(!Imm32Only && !IsBuffer);
2127 if (!SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset,
false,
false,
true))
2132 ImmOff =
C->getSExtValue();
2134 return SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr,
false,
false,
true,
2141 !
Addr->getFlags().hasNoUnsignedWrap())
2147 N0 =
Addr.getOperand(0);
2148 N1 =
Addr.getOperand(1);
2150 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2155 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2160 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2170 bool Imm32Only)
const {
2171 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2172 SBase = Expand32BitAddress(SBase);
2176 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2187 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2193 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2199 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2205 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2209 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2213bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2216 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2220bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2224 return N.getValueType() == MVT::i32 &&
2225 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2230bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2252 if (isa<ConstantSDNode>(Index))
2260SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2264 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2270 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2280void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2285 const SDValue &Shl =
N->getOperand(0);
2293 if (0 < BVal && BVal <= CVal && CVal < 32) {
2303void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2304 switch (
N->getOpcode()) {
2306 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2309 const SDValue &Srl =
N->getOperand(0);
2313 if (Shift && Mask) {
2327 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2334 if (Shift && Mask) {
2345 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2346 SelectS_BFEFromShifts(
N);
2351 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2352 SelectS_BFEFromShifts(
N);
2363 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2367 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2377bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2379 if (!
N->hasOneUse())
2389 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2393 if (VT == MVT::i64) {
2416 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2420 auto Cond = VCMP.getOperand(0);
2432void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2435 if (
Cond.isUndef()) {
2437 N->getOperand(2),
N->getOperand(0));
2443 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2444 bool AndExec = !UseSCCBr;
2445 bool Negate =
false;
2450 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2463 bool NegatedBallot =
false;
2466 UseSCCBr = !BallotCond->isDivergent();
2467 Negate = Negate ^ NegatedBallot;
2482 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2483 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2484 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2503 Subtarget->
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2518void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2520 !
N->isDivergent()) {
2522 if (Src.getValueType() == MVT::f16) {
2534void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2537 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2538 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2552 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2553 N = glueCopyToM0(
N, PtrBase);
2559 N = glueCopyToM0(
N,
Ptr);
2567 N->getOperand(
N->getNumOperands() - 1)
2576void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2577 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2578 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2579 N->getOperand(5),
N->getOperand(0)};
2589 case Intrinsic::amdgcn_ds_gws_init:
2590 return AMDGPU::DS_GWS_INIT;
2591 case Intrinsic::amdgcn_ds_gws_barrier:
2592 return AMDGPU::DS_GWS_BARRIER;
2593 case Intrinsic::amdgcn_ds_gws_sema_v:
2594 return AMDGPU::DS_GWS_SEMA_V;
2595 case Intrinsic::amdgcn_ds_gws_sema_br:
2596 return AMDGPU::DS_GWS_SEMA_BR;
2597 case Intrinsic::amdgcn_ds_gws_sema_p:
2598 return AMDGPU::DS_GWS_SEMA_P;
2599 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2600 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2606void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2607 if (!Subtarget->
hasGWS() ||
2608 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2616 const bool HasVSrc =
N->getNumOperands() == 4;
2617 assert(HasVSrc ||
N->getNumOperands() == 3);
2620 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2631 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2637 ImmOffset = ConstOffset->getZExtValue();
2655 glueCopyToM0(
N,
SDValue(M0Base, 0));
2672void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2730void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2731 unsigned IntrID =
N->getConstantOperandVal(1);
2733 case Intrinsic::amdgcn_ds_append:
2734 case Intrinsic::amdgcn_ds_consume: {
2735 if (
N->getValueType(0) != MVT::i32)
2737 SelectDSAppendConsume(
N, IntrID);
2740 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2741 SelectDSBvhStackIntrinsic(
N);
2743 case Intrinsic::amdgcn_init_whole_wave:
2746 ->setInitWholeWave();
2753void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2754 unsigned IntrID =
N->getConstantOperandVal(0);
2755 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2756 SDNode *ConvGlueNode =
N->getGluedNode();
2763 MVT::Glue,
SDValue(ConvGlueNode, 0));
2765 ConvGlueNode =
nullptr;
2768 case Intrinsic::amdgcn_wqm:
2769 Opcode = AMDGPU::WQM;
2771 case Intrinsic::amdgcn_softwqm:
2772 Opcode = AMDGPU::SOFT_WQM;
2774 case Intrinsic::amdgcn_wwm:
2775 case Intrinsic::amdgcn_strict_wwm:
2776 Opcode = AMDGPU::STRICT_WWM;
2778 case Intrinsic::amdgcn_strict_wqm:
2779 Opcode = AMDGPU::STRICT_WQM;
2781 case Intrinsic::amdgcn_interp_p1_f16:
2782 SelectInterpP1F16(
N);
2784 case Intrinsic::amdgcn_permlane16_swap:
2785 case Intrinsic::amdgcn_permlane32_swap: {
2786 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
2788 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
2794 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
2795 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
2796 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
2800 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2802 bool FI =
N->getConstantOperandVal(3);
2814 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2821 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2826void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2827 unsigned IntrID =
N->getConstantOperandVal(1);
2829 case Intrinsic::amdgcn_ds_gws_init:
2830 case Intrinsic::amdgcn_ds_gws_barrier:
2831 case Intrinsic::amdgcn_ds_gws_sema_v:
2832 case Intrinsic::amdgcn_ds_gws_sema_br:
2833 case Intrinsic::amdgcn_ds_gws_sema_p:
2834 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2835 SelectDS_GWS(
N, IntrID);
2844void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2848 {N->getOperand(0), Log2WaveSize});
2851void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2868 if (
N->isDivergent()) {
2875 {SrcVal, Log2WaveSize}),
2883bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2885 bool IsCanonicalizing,
2886 bool AllowAbs)
const {
2892 Src = Src.getOperand(0);
2893 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2896 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2897 if (LHS &&
LHS->isZero()) {
2899 Src = Src.getOperand(1);
2903 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2905 Src = Src.getOperand(0);
2914 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2923bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2926 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2935bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2938 if (SelectVOP3ModsImpl(In, Src, Mods,
2948bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2956bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2960 if (SelectVOP3ModsImpl(In, Src, Mods,
2972bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2974 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2977bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2979 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2982bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2989 return SelectVOP3Mods(In, Src, SrcMods);
2992bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2999 return SelectVOP3BMods(In, Src, SrcMods);
3002bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3013bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3014 SDValue &SrcMods,
bool IsDOT)
const {
3021 Src = Src.getOperand(0);
3026 unsigned VecMods = Mods;
3028 SDValue Lo = stripBitcast(Src.getOperand(0));
3029 SDValue Hi = stripBitcast(Src.getOperand(1));
3032 Lo = stripBitcast(
Lo.getOperand(0));
3037 Hi = stripBitcast(
Hi.getOperand(0));
3047 unsigned VecSize = Src.getValueSizeInBits();
3048 Lo = stripExtractLoElt(
Lo);
3049 Hi = stripExtractLoElt(
Hi);
3051 if (
Lo.getValueSizeInBits() > VecSize) {
3053 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3057 if (
Hi.getValueSizeInBits() > VecSize) {
3059 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3063 assert(
Lo.getValueSizeInBits() <= VecSize &&
3064 Hi.getValueSizeInBits() <= VecSize);
3066 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3070 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
3073 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3078 Lo.getValueType()), 0);
3079 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3080 : AMDGPU::SReg_64RegClassID;
3087 Src.getValueType(), Ops), 0);
3093 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3094 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3095 .bitcastToAPInt().getZExtValue();
3113bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3115 return SelectVOP3PMods(In, Src, SrcMods,
true);
3118bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3122 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3125 unsigned SrcSign =
C->getZExtValue();
3133bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3136 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3139 unsigned SrcVal =
C->getZExtValue();
3150 unsigned DstRegClass;
3152 switch (Elts.
size()) {
3154 DstRegClass = AMDGPU::VReg_256RegClassID;
3158 DstRegClass = AMDGPU::VReg_128RegClassID;
3162 DstRegClass = AMDGPU::VReg_64RegClassID;
3171 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3183 assert(
"unhandled Reg sequence size" &&
3184 (Elts.
size() == 8 || Elts.
size() == 16));
3188 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3189 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3197 {Elts[i + 1], Elts[i], PackLoLo});
3207 const SDLoc &
DL,
unsigned ElementSize) {
3208 if (ElementSize == 16)
3210 if (ElementSize == 32)
3218 unsigned ElementSize) {
3223 for (
auto El : Elts) {
3226 NegAbsElts.
push_back(El->getOperand(0));
3228 if (Elts.size() != NegAbsElts.
size()) {
3248 std::function<
bool(
SDValue)> ModifierCheck) {
3251 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3252 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3253 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3254 if (!ModifierCheck(ElF16))
3261bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3267 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3286 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3308bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3315 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3319 if (EltsF16.
empty())
3334 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3340 if (EltsV2F16.
empty())
3357bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3363 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3367 unsigned ModOpcode =
3386bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3387 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3390 if (isInlineImmediate(
Splat.getNode())) {
3392 unsigned Imm =
C->getAPIntValue().getSExtValue();
3397 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3406 SDValue SplatSrc32 = stripBitcast(In);
3407 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3408 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3409 SDValue SplatSrc16 = stripBitcast(Splat32);
3410 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3413 std::optional<APInt> RawValue;
3415 RawValue =
C->getValueAPF().bitcastToAPInt();
3417 RawValue =
C->getAPIntValue();
3419 if (RawValue.has_value()) {
3420 EVT VT =
In.getValueType().getScalarType();
3426 if (
TII->isInlineConstant(FloatVal)) {
3432 if (
TII->isInlineConstant(RawValue.value())) {
3446bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3465bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3484bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3492bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3495 return SelectVOP3Mods(In, Src, SrcMods);
3500bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3501 unsigned &Mods)
const {
3503 SelectVOP3ModsImpl(In, Src, Mods);
3506 Src = Src.getOperand(0);
3507 assert(Src.getValueType() == MVT::f16);
3508 Src = stripBitcast(Src);
3514 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3541bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3544 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3550bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3553 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3562 unsigned NumOpcodes = 0;
3575 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3577 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
3578 if (
C->isAllOnes()) {
3588 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3602 if (Src.size() == 3) {
3607 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3608 if (
C->isAllOnes()) {
3610 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3611 if (Src[
I] ==
LHS) {
3623 Bits = SrcBits[Src.size()];
3628 switch (In.getOpcode()) {
3636 if (!getOperandBits(
LHS, LHSBits) ||
3637 !getOperandBits(
RHS, RHSBits)) {
3639 return std::make_pair(0, 0);
3645 NumOpcodes +=
Op.first;
3646 LHSBits =
Op.second;
3651 NumOpcodes +=
Op.first;
3652 RHSBits =
Op.second;
3657 return std::make_pair(0, 0);
3661 switch (In.getOpcode()) {
3663 TTbl = LHSBits & RHSBits;
3666 TTbl = LHSBits | RHSBits;
3669 TTbl = LHSBits ^ RHSBits;
3675 return std::make_pair(NumOpcodes + 1, TTbl);
3682 unsigned NumOpcodes;
3684 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
3688 if (NumOpcodes < 2 || Src.empty())
3694 if (NumOpcodes < 4 && !In->isDivergent())
3697 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
3702 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
3703 In.getOperand(1).getOpcode() ==
In.getOpcode()))
3717 while (Src.size() < 3)
3718 Src.push_back(Src[0]);
3740 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3750bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3759 bool AllUsesAcceptSReg =
true;
3761 Limit < 10 &&
U != E; ++
U, ++Limit) {
3763 getOperandRegClass(
U->getUser(),
U->getOperandNo());
3771 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3772 AllUsesAcceptSReg =
false;
3774 if (
User->isMachineOpcode()) {
3775 unsigned Opc =
User->getMachineOpcode();
3777 if (
Desc.isCommutable()) {
3778 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
3781 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3783 getOperandRegClass(
U->getUser(), CommutedOpNo);
3784 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3785 CommutedRC == &AMDGPU::VS_64RegClass)
3786 AllUsesAcceptSReg =
true;
3794 if (!AllUsesAcceptSReg)
3798 return !AllUsesAcceptSReg && (Limit < 10);
3801bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3802 const auto *Ld = cast<LoadSDNode>(
N);
3818 ->isMemOpHasNoClobberedMemOperand(
N)));
3824 bool IsModified =
false;
3831 SDNode *Node = &*Position++;
3837 if (ResNode != Node) {
3844 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDValue getRegister(Register Reg, EVT VT)
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Implement std::hash so that hash_code can be used in STL containers.
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.