30#include "llvm/IR/IntrinsicsAMDGPU.h"
34#ifdef EXPENSIVE_CHECKS
39#define DEBUG_TYPE "amdgpu-isel"
54 In = stripBitcast(In);
60 Out = In.getOperand(0);
71 if (ShiftAmt->getZExtValue() == 16) {
87 return In.getOperand(0);
92 if (Src.getValueType().getSizeInBits() == 32)
93 return stripBitcast(Src);
102 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
106#ifdef EXPENSIVE_CHECKS
111 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
127#ifdef EXPENSIVE_CHECKS
128 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
129 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
131 assert(L->isLCSSAForm(DT));
139bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
205#ifdef EXPENSIVE_CHECKS
214 MVT VT =
N->getValueType(0).getSimpleVT();
215 if (VT != MVT::v2i16 && VT != MVT::v2f16)
221 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
258 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
259 if (LdLo &&
Lo.hasOneUse()) {
298 bool MadeChange =
false;
304 switch (
N->getOpcode()) {
321bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
327 return TII->isInlineConstant(
C->getAPIntValue());
330 return TII->isInlineConstant(
C->getValueAPF());
340 unsigned OpNo)
const {
341 if (!
N->isMachineOpcode()) {
343 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
344 if (
Reg.isVirtual()) {
346 return MRI.getRegClass(Reg);
350 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
351 return TRI->getPhysRegBaseClass(Reg);
357 switch (
N->getMachineOpcode()) {
361 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
362 if (OpIdx >=
Desc.getNumOperands())
364 int RegClass =
Desc.operands()[OpIdx].RegClass;
370 case AMDGPU::REG_SEQUENCE: {
371 unsigned RCID =
N->getConstantOperandVal(0);
375 SDValue SubRegOp =
N->getOperand(OpNo + 1);
385 SmallVector <SDValue, 8> Ops;
387 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
398 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
401 return glueCopyToOp(
N,
M0,
M0.getValue(1));
404SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
405 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
421 AMDGPU::S_MOV_B32,
DL, MVT::i32,
435 EVT VT =
N->getValueType(0);
441 if (NumVectorElts == 1) {
447 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
457 bool IsRegSeq =
true;
458 unsigned NOps =
N->getNumOperands();
459 for (
unsigned i = 0; i < NOps; i++) {
461 if (isa<RegisterSDNode>(
N->getOperand(i))) {
467 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
470 if (NOps != NumVectorElts) {
475 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
478 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
479 RegSeqArgs[1 + (2 * i) + 1] =
490 unsigned int Opc =
N->getOpcode();
491 if (
N->isMachineOpcode()) {
501 N = glueCopyToM0LDSInit(
N);
516 if (
N->getValueType(0) != MVT::i64)
519 SelectADD_SUB_I64(
N);
524 if (
N->getValueType(0) != MVT::i32)
531 SelectUADDO_USUBO(
N);
535 SelectFMUL_W_CHAIN(
N);
539 SelectFMA_W_CHAIN(
N);
545 EVT VT =
N->getValueType(0);
559 unsigned RegClassID =
567 if (
N->getValueType(0) == MVT::i128) {
571 }
else if (
N->getValueType(0) == MVT::i64) {
578 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
579 N->getOperand(1), SubReg1 };
581 N->getValueType(0), Ops));
587 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
592 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
597 Imm =
C->getZExtValue();
644 return SelectMUL_LOHI(
N);
655 if (
N->getValueType(0) != MVT::i32)
672 if (
N->getValueType(0) == MVT::i32) {
675 { N->getOperand(0), N->getOperand(1) });
683 SelectINTRINSIC_W_CHAIN(
N);
687 SelectINTRINSIC_WO_CHAIN(
N);
691 SelectINTRINSIC_VOID(
N);
695 SelectWAVE_ADDRESS(
N);
699 SelectSTACKRESTORE(
N);
707bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
710 return Term->getMetadata(
"amdgpu.uniform") ||
711 Term->getMetadata(
"structurizecfg.uniform");
714bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
715 unsigned ShAmtBits)
const {
718 const APInt &
RHS =
N->getConstantOperandAPInt(1);
719 if (
RHS.countr_one() >= ShAmtBits)
723 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
749 N1 =
Lo.getOperand(1);
766 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
774 return "AMDGPU DAG->DAG Pattern Instruction Selection";
791 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
795 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
799 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
810SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
813 AMDGPU::S_MOV_B32,
DL, MVT::i32,
819void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
824 unsigned Opcode =
N->getOpcode();
834 DL, MVT::i32, LHS, Sub0);
836 DL, MVT::i32, LHS, Sub1);
839 DL, MVT::i32, RHS, Sub0);
841 DL, MVT::i32, RHS, Sub1);
845 static const unsigned OpcMap[2][2][2] = {
846 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
847 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
848 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
849 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
851 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
852 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
877 MVT::i64, RegSequenceArgs);
888void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
894 if (
N->isDivergent()) {
896 : AMDGPU::V_SUBB_U32_e64;
898 N, Opc,
N->getVTList(),
900 CurDAG->getTargetConstant(0, {}, MVT::i1) });
903 : AMDGPU::S_SUB_CO_PSEUDO;
904 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
908void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
913 bool IsVALU =
N->isDivergent();
917 if (UI.getUse().getResNo() == 1) {
926 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
929 N, Opc,
N->getVTList(),
930 {N->getOperand(0), N->getOperand(1),
931 CurDAG->getTargetConstant(0, {}, MVT::i1) });
933 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
934 : AMDGPU::S_USUBO_PSEUDO;
936 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
937 {
N->getOperand(0),
N->getOperand(1)});
941void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
946 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
947 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
948 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
949 Ops[8] =
N->getOperand(0);
950 Ops[9] =
N->getOperand(4);
955 cast<ConstantSDNode>(Ops[0])->isZero() &&
956 cast<ConstantSDNode>(Ops[2])->isZero() &&
957 cast<ConstantSDNode>(Ops[4])->isZero();
958 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
962void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
967 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
968 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
969 Ops[6] =
N->getOperand(0);
970 Ops[7] =
N->getOperand(3);
977void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
979 EVT VT =
N->getValueType(0);
981 assert(VT == MVT::f32 || VT == MVT::f64);
984 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
989 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
990 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
991 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
997void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1002 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1003 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1005 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1008 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1015void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1020 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1021 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1023 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1027 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1032 MVT::i32,
SDValue(Mad, 0), Sub0);
1038 MVT::i32,
SDValue(Mad, 0), Sub1);
1073 int64_t ByteOffset =
C->getSExtValue();
1074 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1081 Zero,
Addr.getOperand(1));
1083 if (isDSOffsetLegal(Sub, ByteOffset)) {
1089 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1091 SubOp = AMDGPU::V_SUB_U32_e64;
1113 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1116 DL, MVT::i32, Zero);
1129bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1131 unsigned Size)
const {
1132 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1134 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1149 Addr->getFlags().hasNoUnsignedWrap()) ||
1156bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1165 auto LHS =
Addr.getOperand(0);
1166 auto RHS =
Addr.getOperand(1);
1173 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1183bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1192 auto LHS =
Addr.getOperand(0);
1193 auto RHS =
Addr.getOperand(1);
1199bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1206 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1213 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1216 auto LHS =
Base.getOperand(0);
1217 auto RHS =
Base.getOperand(1);
1225 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1231 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1236 unsigned Size)
const {
1244 unsigned OffsetValue1 = OffsetValue0 +
Size;
1247 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1256 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1257 unsigned OffsetValue0 =
C->getZExtValue();
1258 unsigned OffsetValue1 = OffsetValue0 +
Size;
1260 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1270 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1274 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1276 SubOp = AMDGPU::V_SUB_U32_e64;
1292 unsigned OffsetValue0 = CAddr->getZExtValue();
1293 unsigned OffsetValue1 = OffsetValue0 +
Size;
1295 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1335 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1337 N0 =
Addr.getOperand(0);
1396 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1412 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1416 if (
C->getSExtValue()) {
1429std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1432 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1443bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1455 int64_t
Imm = CAddr->getSExtValue();
1456 const int64_t NullPtr =
1459 if (Imm != NullPtr) {
1464 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1465 VAddr =
SDValue(MovHighBits, 0);
1495 if (
TII->isLegalMUBUFImmOffset(C1) &&
1498 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1505 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1513 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1514 if (!Reg.isPhysical())
1516 auto RC =
TRI.getPhysRegBaseClass(Reg);
1517 return RC &&
TRI.isSGPRClass(RC);
1520bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1543 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1549 SOffset =
Addr.getOperand(0);
1550 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1570 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1573 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1574 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1575 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1589bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1596 SOffset = ByteOffsetNode;
1606 assert(isa<BuildVectorSDNode>(
N));
1617 int64_t OffsetVal = 0;
1621 bool CanHaveFlatSegmentOffsetBug =
1628 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1630 isFlatScratchBaseLegal(
Addr))) {
1631 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1634 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1636 OffsetVal = COffsetVal;
1651 std::tie(OffsetVal, RemainderOffset) =
1652 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1655 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1658 if (
Addr.getValueType().getSizeInBits() == 32) {
1662 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1664 AddOp = AMDGPU::V_ADD_U32_e64;
1675 DL, MVT::i32, N0, Sub0);
1677 DL, MVT::i32, N0, Sub1);
1680 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1686 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1689 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1697 MVT::i64, RegSequenceArgs),
1724 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1738bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1743 int64_t ImmOffset = 0;
1749 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1750 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1756 ImmOffset = COffsetVal;
1757 }
else if (!
LHS->isDivergent()) {
1758 if (COffsetVal > 0) {
1763 int64_t SplitImmOffset, RemainderOffset;
1764 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1767 if (isUInt<32>(RemainderOffset)) {
1769 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1783 unsigned NumLiterals =
1784 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1785 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1796 if (!
LHS->isDivergent()) {
1804 if (!SAddr && !
RHS->isDivergent()) {
1819 isa<ConstantSDNode>(
Addr))
1834 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1837 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1840 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1842 FI->getValueType(0));
1855 if (
Addr->isDivergent())
1860 int64_t COffsetVal = 0;
1863 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1864 SAddr =
Addr.getOperand(0);
1875 int64_t SplitImmOffset, RemainderOffset;
1876 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1879 COffsetVal = SplitImmOffset;
1883 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1884 :
CurDAG->getTargetConstant(RemainderOffset,
DL,
MVT::i32);
1896bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1911 return (VMax & 3) + (
SMax & 3) >= 4;
1917 int64_t ImmOffset = 0;
1921 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1922 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1927 ImmOffset = COffsetVal;
1928 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1932 int64_t SplitImmOffset, RemainderOffset;
1933 std::tie(SplitImmOffset, RemainderOffset)
1936 if (isUInt<32>(RemainderOffset)) {
1938 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1942 if (!isFlatScratchBaseLegal(
Addr))
1944 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1958 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1961 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1968 if (OrigAddr !=
Addr) {
1969 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1972 if (!isFlatScratchBaseLegalSV(OrigAddr))
1976 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1986bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
1988 bool Imm32Only,
bool IsBuffer)
const {
1990 "Cannot match both soffset and offset at the same time!");
1998 *SOffset = ByteOffsetNode;
2010 SDLoc SL(ByteOffsetNode);
2014 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2015 std::optional<int64_t> EncodedOffset =
2017 if (EncodedOffset &&
Offset && !Imm32Only) {
2027 if (EncodedOffset &&
Offset && Imm32Only) {
2032 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2046 if (
Addr.getValueType() != MVT::i32)
2054 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2076 bool IsBuffer)
const {
2078 assert(!Imm32Only && !IsBuffer);
2080 return SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset) &&
2081 SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr);
2087 !
Addr->getFlags().hasNoUnsignedWrap())
2093 N0 =
Addr.getOperand(0);
2094 N1 =
Addr.getOperand(1);
2096 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2100 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer)) {
2104 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer)) {
2113 bool Imm32Only)
const {
2114 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2115 SBase = Expand32BitAddress(SBase);
2119 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2130 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2136 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2142 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2148 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2152 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2156bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2159 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2163bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2167 return N.getValueType() == MVT::i32 &&
2168 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2173bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2195 if (isa<ConstantSDNode>(
Index))
2203SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2207 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2213 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2223void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2228 const SDValue &Shl =
N->getOperand(0);
2236 if (0 < BVal && BVal <= CVal && CVal < 32) {
2246void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2247 switch (
N->getOpcode()) {
2249 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2252 const SDValue &Srl =
N->getOperand(0);
2256 if (Shift && Mask) {
2270 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2277 if (Shift && Mask) {
2288 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2289 SelectS_BFEFromShifts(
N);
2294 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2295 SelectS_BFEFromShifts(
N);
2306 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2310 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2320bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2322 if (!
N->hasOneUse())
2332 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2336 if (VT == MVT::i64) {
2357 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2361 auto Cond = VCMP.getOperand(0);
2373void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2376 if (
Cond.isUndef()) {
2378 N->getOperand(2),
N->getOperand(0));
2385 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2386 bool AndExec = !UseSCCBr;
2387 bool Negate =
false;
2392 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2396 VCMP.getValueType().getSizeInBits() ==
ST->getWavefrontSize()) {
2405 bool NegatedBallot =
false;
2408 UseSCCBr = !BallotCond->isDivergent();
2409 Negate = Negate ^ NegatedBallot;
2424 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2425 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2426 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2444 : AMDGPU::S_AND_B64,
2459void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2461 !
N->isDivergent()) {
2463 if (Src.getValueType() == MVT::f16) {
2475void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2478 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2479 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2493 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2494 N = glueCopyToM0(
N, PtrBase);
2500 N = glueCopyToM0(
N,
Ptr);
2508 N->getOperand(
N->getNumOperands() - 1)
2517void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2518 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2519 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2520 N->getOperand(5),
N->getOperand(0)};
2530 case Intrinsic::amdgcn_ds_gws_init:
2531 return AMDGPU::DS_GWS_INIT;
2532 case Intrinsic::amdgcn_ds_gws_barrier:
2533 return AMDGPU::DS_GWS_BARRIER;
2534 case Intrinsic::amdgcn_ds_gws_sema_v:
2535 return AMDGPU::DS_GWS_SEMA_V;
2536 case Intrinsic::amdgcn_ds_gws_sema_br:
2537 return AMDGPU::DS_GWS_SEMA_BR;
2538 case Intrinsic::amdgcn_ds_gws_sema_p:
2539 return AMDGPU::DS_GWS_SEMA_P;
2540 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2541 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2547void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2548 if (!Subtarget->
hasGWS() ||
2549 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2557 const bool HasVSrc =
N->getNumOperands() == 4;
2558 assert(HasVSrc ||
N->getNumOperands() == 3);
2561 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2572 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2578 ImmOffset = ConstOffset->getZExtValue();
2596 glueCopyToM0(
N,
SDValue(M0Base, 0));
2613void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2671void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2672 unsigned IntrID =
N->getConstantOperandVal(1);
2674 case Intrinsic::amdgcn_ds_append:
2675 case Intrinsic::amdgcn_ds_consume: {
2676 if (
N->getValueType(0) != MVT::i32)
2678 SelectDSAppendConsume(
N, IntrID);
2681 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2682 SelectDSBvhStackIntrinsic(
N);
2689void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2690 unsigned IntrID =
N->getConstantOperandVal(0);
2691 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2692 SDNode *ConvGlueNode =
N->getGluedNode();
2699 MVT::Glue,
SDValue(ConvGlueNode, 0));
2701 ConvGlueNode =
nullptr;
2704 case Intrinsic::amdgcn_wqm:
2705 Opcode = AMDGPU::WQM;
2707 case Intrinsic::amdgcn_softwqm:
2708 Opcode = AMDGPU::SOFT_WQM;
2710 case Intrinsic::amdgcn_wwm:
2711 case Intrinsic::amdgcn_strict_wwm:
2712 Opcode = AMDGPU::STRICT_WWM;
2714 case Intrinsic::amdgcn_strict_wqm:
2715 Opcode = AMDGPU::STRICT_WQM;
2717 case Intrinsic::amdgcn_interp_p1_f16:
2718 SelectInterpP1F16(
N);
2720 case Intrinsic::amdgcn_inverse_ballot:
2721 switch (
N->getOperand(1).getValueSizeInBits()) {
2723 Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
2726 Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
2737 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2744 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2749void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2750 unsigned IntrID =
N->getConstantOperandVal(1);
2752 case Intrinsic::amdgcn_ds_gws_init:
2753 case Intrinsic::amdgcn_ds_gws_barrier:
2754 case Intrinsic::amdgcn_ds_gws_sema_v:
2755 case Intrinsic::amdgcn_ds_gws_sema_br:
2756 case Intrinsic::amdgcn_ds_gws_sema_p:
2757 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2758 SelectDS_GWS(
N, IntrID);
2767void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2771 {N->getOperand(0), Log2WaveSize});
2774void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2791 if (
N->isDivergent()) {
2798 {SrcVal, Log2WaveSize}),
2806bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2808 bool IsCanonicalizing,
2809 bool AllowAbs)
const {
2815 Src = Src.getOperand(0);
2816 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2819 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2820 if (LHS &&
LHS->isZero()) {
2822 Src = Src.getOperand(1);
2826 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2828 Src = Src.getOperand(0);
2837 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2846bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2849 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2858bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2861 if (SelectVOP3ModsImpl(In, Src, Mods,
2871bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2879bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2883 if (SelectVOP3ModsImpl(In, Src, Mods,
2895bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2897 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2900bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2902 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2905bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2912 return SelectVOP3Mods(In, Src, SrcMods);
2915bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2922 return SelectVOP3BMods(In, Src, SrcMods);
2925bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
2936bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
2937 SDValue &SrcMods,
bool IsDOT)
const {
2944 Src = Src.getOperand(0);
2949 unsigned VecMods = Mods;
2951 SDValue Lo = stripBitcast(Src.getOperand(0));
2952 SDValue Hi = stripBitcast(Src.getOperand(1));
2955 Lo = stripBitcast(
Lo.getOperand(0));
2960 Hi = stripBitcast(
Hi.getOperand(0));
2970 unsigned VecSize = Src.getValueSizeInBits();
2971 Lo = stripExtractLoElt(
Lo);
2972 Hi = stripExtractLoElt(
Hi);
2974 if (
Lo.getValueSizeInBits() > VecSize) {
2976 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2980 if (
Hi.getValueSizeInBits() > VecSize) {
2982 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2986 assert(
Lo.getValueSizeInBits() <= VecSize &&
2987 Hi.getValueSizeInBits() <= VecSize);
2989 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
2993 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
2996 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3001 Lo.getValueType()), 0);
3002 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3003 : AMDGPU::SReg_64RegClassID;
3010 Src.getValueType(), Ops), 0);
3016 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3017 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3018 .bitcastToAPInt().getZExtValue();
3036bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3038 return SelectVOP3PMods(In, Src, SrcMods,
true);
3041bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3045 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3048 unsigned SrcSign =
C->getZExtValue();
3056bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3059 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3062 unsigned SrcVal =
C->getZExtValue();
3073 unsigned DstRegClass;
3075 switch (Elts.
size()) {
3077 DstRegClass = AMDGPU::VReg_256RegClassID;
3081 DstRegClass = AMDGPU::VReg_128RegClassID;
3085 DstRegClass = AMDGPU::VReg_64RegClassID;
3094 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3106 assert(
"unhandled Reg sequence size" &&
3107 (Elts.
size() == 8 || Elts.
size() == 16));
3111 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3112 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3120 {Elts[i + 1], Elts[i], PackLoLo});
3130 const SDLoc &
DL,
unsigned ElementSize) {
3131 if (ElementSize == 16)
3133 if (ElementSize == 32)
3141 unsigned ElementSize) {
3146 for (
auto El : Elts) {
3149 NegAbsElts.
push_back(El->getOperand(0));
3151 if (Elts.size() != NegAbsElts.
size()) {
3171 std::function<
bool(
SDValue)> ModifierCheck) {
3174 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3175 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3176 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3177 if (!ModifierCheck(ElF16))
3184bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3190 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3209 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3231bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3238 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3242 if (EltsF16.
empty())
3257 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3263 if (EltsV2F16.
empty())
3280bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3286 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3290 unsigned ModOpcode =
3309bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3310 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3313 if (isInlineImmediate(
Splat.getNode())) {
3315 unsigned Imm =
C->getAPIntValue().getSExtValue();
3320 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3329 SDValue SplatSrc32 = stripBitcast(In);
3330 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3331 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3332 SDValue SplatSrc16 = stripBitcast(Splat32);
3333 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3336 std::optional<APInt> RawValue;
3338 RawValue =
C->getValueAPF().bitcastToAPInt();
3340 RawValue =
C->getAPIntValue();
3342 if (RawValue.has_value()) {
3343 EVT VT =
In.getValueType().getScalarType();
3349 if (
TII->isInlineConstant(FloatVal)) {
3355 if (
TII->isInlineConstant(RawValue.value())) {
3369bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3388bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3407bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3415bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3418 return SelectVOP3Mods(In, Src, SrcMods);
3423bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3424 unsigned &Mods)
const {
3426 SelectVOP3ModsImpl(In, Src, Mods);
3429 Src = Src.getOperand(0);
3430 assert(Src.getValueType() == MVT::f16);
3431 Src = stripBitcast(Src);
3437 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3464bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3467 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3473bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3476 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3493 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3503bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3512 bool AllUsesAcceptSReg =
true;
3514 Limit < 10 &&
U != E; ++
U, ++Limit) {
3523 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3524 AllUsesAcceptSReg =
false;
3526 if (
User->isMachineOpcode()) {
3527 unsigned Opc =
User->getMachineOpcode();
3529 if (
Desc.isCommutable()) {
3530 unsigned OpIdx =
Desc.getNumDefs() +
U.getOperandNo();
3533 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3535 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3536 CommutedRC == &AMDGPU::VS_64RegClass)
3537 AllUsesAcceptSReg =
true;
3545 if (!AllUsesAcceptSReg)
3549 return !AllUsesAcceptSReg && (Limit < 10);
3552bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3553 auto Ld = cast<LoadSDNode>(
N);
3569 ->isMemOpHasNoClobberedMemOperand(
N)));
3575 bool IsModified =
false;
3582 SDNode *Node = &*Position++;
3588 if (ResNode != Node) {
3595 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static MachineSDNode * buildRegSequence(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static bool isNoUnsignedWrap(SDValue Addr)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< SDValue > &Elts, SDValue &Src, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
pre isel intrinsic Pre ISel Intrinsic Lowering
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
support::ulittle16_t & Lo
support::ulittle16_t & Hi
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPUDAGToDAGISel()=delete
bool matchLoadD16FromBuildVector(SDNode *N) const
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static bool EnableLateStructurizeCFG
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
uint64_t getZExtValue() const
Get zero extended value.
unsigned countr_one() const
Count the number of trailing one bits.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
unsigned M0(unsigned Val)
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.