30#include "llvm/IR/IntrinsicsAMDGPU.h"
34#ifdef EXPENSIVE_CHECKS
39#define DEBUG_TYPE "amdgpu-isel"
54 In = stripBitcast(In);
60 Out = In.getOperand(0);
71 if (ShiftAmt->getZExtValue() == 16) {
87 return In.getOperand(0);
92 if (Src.getValueType().getSizeInBits() == 32)
93 return stripBitcast(Src);
102 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
107#ifdef EXPENSIVE_CHECKS
112 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
135bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
199#ifdef EXPENSIVE_CHECKS
200 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
201 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
203 assert(L->isLCSSAForm(DT));
212#ifdef EXPENSIVE_CHECKS
221 MVT VT =
N->getValueType(0).getSimpleVT();
222 if (VT != MVT::v2i16 && VT != MVT::v2f16)
228 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
265 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
266 if (LdLo &&
Lo.hasOneUse()) {
305 bool MadeChange =
false;
311 switch (
N->getOpcode()) {
328bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
334 return TII->isInlineConstant(
C->getAPIntValue());
337 return TII->isInlineConstant(
C->getValueAPF());
347 unsigned OpNo)
const {
348 if (!
N->isMachineOpcode()) {
350 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
351 if (Reg.isVirtual()) {
353 return MRI.getRegClass(Reg);
357 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
358 return TRI->getPhysRegBaseClass(Reg);
364 switch (
N->getMachineOpcode()) {
368 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
369 if (OpIdx >=
Desc.getNumOperands())
371 int RegClass =
Desc.operands()[OpIdx].RegClass;
377 case AMDGPU::REG_SEQUENCE: {
378 unsigned RCID =
N->getConstantOperandVal(0);
382 SDValue SubRegOp =
N->getOperand(OpNo + 1);
392 SmallVector <SDValue, 8> Ops;
394 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
405 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
408 return glueCopyToOp(
N,
M0,
M0.getValue(1));
411SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
412 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
428 AMDGPU::S_MOV_B32,
DL, MVT::i32,
442 EVT VT =
N->getValueType(0);
448 if (NumVectorElts == 1) {
454 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
464 bool IsRegSeq =
true;
465 unsigned NOps =
N->getNumOperands();
466 for (
unsigned i = 0; i < NOps; i++) {
468 if (isa<RegisterSDNode>(
N->getOperand(i))) {
474 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
477 if (NOps != NumVectorElts) {
482 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
485 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
486 RegSeqArgs[1 + (2 * i) + 1] =
497 unsigned int Opc =
N->getOpcode();
498 if (
N->isMachineOpcode()) {
506 N = glueCopyToM0LDSInit(
N);
521 if (
N->getValueType(0) != MVT::i64)
524 SelectADD_SUB_I64(
N);
529 if (
N->getValueType(0) != MVT::i32)
536 SelectUADDO_USUBO(
N);
540 SelectFMUL_W_CHAIN(
N);
544 SelectFMA_W_CHAIN(
N);
550 EVT VT =
N->getValueType(0);
564 unsigned RegClassID =
572 if (
N->getValueType(0) == MVT::i128) {
576 }
else if (
N->getValueType(0) == MVT::i64) {
583 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
584 N->getOperand(1), SubReg1 };
586 N->getValueType(0), Ops));
592 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
597 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
602 Imm =
C->getZExtValue();
649 return SelectMUL_LOHI(
N);
660 if (
N->getValueType(0) != MVT::i32)
677 if (
N->getValueType(0) == MVT::i32) {
680 { N->getOperand(0), N->getOperand(1) });
688 SelectINTRINSIC_W_CHAIN(
N);
692 SelectINTRINSIC_WO_CHAIN(
N);
696 SelectINTRINSIC_VOID(
N);
700 SelectWAVE_ADDRESS(
N);
704 SelectSTACKRESTORE(
N);
712bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
715 return Term->getMetadata(
"amdgpu.uniform") ||
716 Term->getMetadata(
"structurizecfg.uniform");
719bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
720 unsigned ShAmtBits)
const {
723 const APInt &
RHS =
N->getConstantOperandAPInt(1);
724 if (
RHS.countr_one() >= ShAmtBits)
728 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
754 N1 =
Lo.getOperand(1);
771 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
779 return "AMDGPU DAG->DAG Pattern Instruction Selection";
789#ifdef EXPENSIVE_CHECKS
795 for (
auto &L : LI.getLoopsInPreorder())
796 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
815 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
819 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
823 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
834SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
837 AMDGPU::S_MOV_B32,
DL, MVT::i32,
843void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
848 unsigned Opcode =
N->getOpcode();
858 DL, MVT::i32, LHS, Sub0);
860 DL, MVT::i32, LHS, Sub1);
863 DL, MVT::i32, RHS, Sub0);
865 DL, MVT::i32, RHS, Sub1);
869 static const unsigned OpcMap[2][2][2] = {
870 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
871 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
872 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
873 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
875 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
876 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
901 MVT::i64, RegSequenceArgs);
912void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
918 if (
N->isDivergent()) {
920 : AMDGPU::V_SUBB_U32_e64;
922 N, Opc,
N->getVTList(),
924 CurDAG->getTargetConstant(0, {}, MVT::i1) });
927 : AMDGPU::S_SUB_CO_PSEUDO;
928 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
932void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
937 bool IsVALU =
N->isDivergent();
941 if (UI.getUse().getResNo() == 1) {
950 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
953 N, Opc,
N->getVTList(),
954 {N->getOperand(0), N->getOperand(1),
955 CurDAG->getTargetConstant(0, {}, MVT::i1) });
957 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
958 : AMDGPU::S_USUBO_PSEUDO;
960 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
961 {
N->getOperand(0),
N->getOperand(1)});
965void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
970 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
971 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
972 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
973 Ops[8] =
N->getOperand(0);
974 Ops[9] =
N->getOperand(4);
979 cast<ConstantSDNode>(Ops[0])->isZero() &&
980 cast<ConstantSDNode>(Ops[2])->isZero() &&
981 cast<ConstantSDNode>(Ops[4])->isZero();
982 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
986void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
991 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
992 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
993 Ops[6] =
N->getOperand(0);
994 Ops[7] =
N->getOperand(3);
1001void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1003 EVT VT =
N->getValueType(0);
1005 assert(VT == MVT::f32 || VT == MVT::f64);
1008 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1013 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1014 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1015 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1021void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1026 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1027 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1029 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1032 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1039void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1044 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1045 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1047 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1051 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1056 MVT::i32,
SDValue(Mad, 0), Sub0);
1062 MVT::i32,
SDValue(Mad, 0), Sub1);
1097 int64_t ByteOffset =
C->getSExtValue();
1098 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1105 Zero,
Addr.getOperand(1));
1107 if (isDSOffsetLegal(Sub, ByteOffset)) {
1113 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1115 SubOp = AMDGPU::V_SUB_U32_e64;
1137 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1140 DL, MVT::i32, Zero);
1153bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1155 unsigned Size)
const {
1156 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1158 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1173 Addr->getFlags().hasNoUnsignedWrap()) ||
1180bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1189 auto LHS =
Addr.getOperand(0);
1190 auto RHS =
Addr.getOperand(1);
1197 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1207bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1216 auto LHS =
Addr.getOperand(0);
1217 auto RHS =
Addr.getOperand(1);
1223bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1230 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1237 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1240 auto LHS =
Base.getOperand(0);
1241 auto RHS =
Base.getOperand(1);
1249 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1255 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1260 unsigned Size)
const {
1268 unsigned OffsetValue1 = OffsetValue0 +
Size;
1271 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1280 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1281 unsigned OffsetValue0 =
C->getZExtValue();
1282 unsigned OffsetValue1 = OffsetValue0 +
Size;
1284 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1294 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1298 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1300 SubOp = AMDGPU::V_SUB_U32_e64;
1318 unsigned OffsetValue0 = CAddr->getZExtValue();
1319 unsigned OffsetValue1 = OffsetValue0 +
Size;
1321 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1361 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1363 N0 =
Addr.getOperand(0);
1422 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1438 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1442 if (
C->getSExtValue()) {
1455std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1458 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1469bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1481 int64_t
Imm = CAddr->getSExtValue();
1482 const int64_t NullPtr =
1485 if (Imm != NullPtr) {
1490 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1491 VAddr =
SDValue(MovHighBits, 0);
1521 if (
TII->isLegalMUBUFImmOffset(C1) &&
1524 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1531 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1539 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1540 if (!Reg.isPhysical())
1542 auto RC =
TRI.getPhysRegBaseClass(Reg);
1543 return RC &&
TRI.isSGPRClass(RC);
1546bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1569 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1575 SOffset =
Addr.getOperand(0);
1576 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1596 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1599 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1600 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1601 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1615bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1622 SOffset = ByteOffsetNode;
1632 assert(isa<BuildVectorSDNode>(
N));
1643 int64_t OffsetVal = 0;
1647 bool CanHaveFlatSegmentOffsetBug =
1654 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1656 isFlatScratchBaseLegal(
Addr))) {
1657 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1660 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1662 OffsetVal = COffsetVal;
1677 std::tie(OffsetVal, RemainderOffset) =
1678 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1681 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1684 if (
Addr.getValueType().getSizeInBits() == 32) {
1688 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1690 AddOp = AMDGPU::V_ADD_U32_e64;
1701 DL, MVT::i32, N0, Sub0);
1703 DL, MVT::i32, N0, Sub1);
1706 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1712 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1715 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1723 MVT::i64, RegSequenceArgs),
1750 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1764bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1769 int64_t ImmOffset = 0;
1775 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1776 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1782 ImmOffset = COffsetVal;
1783 }
else if (!
LHS->isDivergent()) {
1784 if (COffsetVal > 0) {
1789 int64_t SplitImmOffset, RemainderOffset;
1790 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1793 if (isUInt<32>(RemainderOffset)) {
1795 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1809 unsigned NumLiterals =
1810 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1811 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1822 if (!
LHS->isDivergent()) {
1830 if (!SAddr && !
RHS->isDivergent()) {
1845 isa<ConstantSDNode>(
Addr))
1860 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1863 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1866 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1868 FI->getValueType(0));
1881 if (
Addr->isDivergent())
1886 int64_t COffsetVal = 0;
1889 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1890 SAddr =
Addr.getOperand(0);
1901 int64_t SplitImmOffset, RemainderOffset;
1902 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1905 COffsetVal = SplitImmOffset;
1909 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1910 :
CurDAG->getTargetConstant(RemainderOffset,
DL,
MVT::i32);
1922bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1936 return (VMax & 3) + (
SMax & 3) >= 4;
1942 int64_t ImmOffset = 0;
1946 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1947 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1952 ImmOffset = COffsetVal;
1953 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1957 int64_t SplitImmOffset, RemainderOffset;
1958 std::tie(SplitImmOffset, RemainderOffset)
1961 if (isUInt<32>(RemainderOffset)) {
1963 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1967 if (!isFlatScratchBaseLegal(
Addr))
1969 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1983 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1986 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1993 if (OrigAddr !=
Addr) {
1994 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1997 if (!isFlatScratchBaseLegalSV(OrigAddr))
2001 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2011bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2014 int64_t ImmOffset)
const {
2015 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2028bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
2030 bool Imm32Only,
bool IsBuffer,
2032 int64_t ImmOffset)
const {
2034 "Cannot match both soffset and offset at the same time!");
2043 *SOffset = ByteOffsetNode;
2044 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2050 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2057 SDLoc SL(ByteOffsetNode);
2061 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2063 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2064 if (EncodedOffset &&
Offset && !Imm32Only) {
2074 if (EncodedOffset &&
Offset && Imm32Only) {
2079 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2093 if (
Addr.getValueType() != MVT::i32)
2101 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2122 bool Imm32Only,
bool IsBuffer,
2124 int64_t ImmOffset)
const {
2126 assert(!Imm32Only && !IsBuffer);
2129 if (!SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset,
false,
false,
true))
2134 ImmOff =
C->getSExtValue();
2136 return SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr,
false,
false,
true,
2143 !
Addr->getFlags().hasNoUnsignedWrap())
2149 N0 =
Addr.getOperand(0);
2150 N1 =
Addr.getOperand(1);
2152 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2157 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2162 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2172 bool Imm32Only)
const {
2173 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2174 SBase = Expand32BitAddress(SBase);
2178 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2189 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2195 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2201 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2207 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2211 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2215bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2218 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2222bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2226 return N.getValueType() == MVT::i32 &&
2227 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2232bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2254 if (isa<ConstantSDNode>(
Index))
2262SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2266 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2272 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2282void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2287 const SDValue &Shl =
N->getOperand(0);
2295 if (0 < BVal && BVal <= CVal && CVal < 32) {
2305void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2306 switch (
N->getOpcode()) {
2308 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2311 const SDValue &Srl =
N->getOperand(0);
2315 if (Shift && Mask) {
2329 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2336 if (Shift && Mask) {
2347 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2348 SelectS_BFEFromShifts(
N);
2353 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2354 SelectS_BFEFromShifts(
N);
2365 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2369 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2379bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2381 if (!
N->hasOneUse())
2391 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2395 if (VT == MVT::i64) {
2416 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2420 auto Cond = VCMP.getOperand(0);
2432void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2435 if (
Cond.isUndef()) {
2437 N->getOperand(2),
N->getOperand(0));
2444 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2445 bool AndExec = !UseSCCBr;
2446 bool Negate =
false;
2451 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2455 VCMP.getValueType().getSizeInBits() ==
ST->getWavefrontSize()) {
2464 bool NegatedBallot =
false;
2467 UseSCCBr = !BallotCond->isDivergent();
2468 Negate = Negate ^ NegatedBallot;
2483 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2484 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2485 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2503 : AMDGPU::S_AND_B64,
2518void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2520 !
N->isDivergent()) {
2522 if (Src.getValueType() == MVT::f16) {
2534void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2537 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2538 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2552 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2553 N = glueCopyToM0(
N, PtrBase);
2559 N = glueCopyToM0(
N,
Ptr);
2567 N->getOperand(
N->getNumOperands() - 1)
2576void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2577 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2578 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2579 N->getOperand(5),
N->getOperand(0)};
2589 case Intrinsic::amdgcn_ds_gws_init:
2590 return AMDGPU::DS_GWS_INIT;
2591 case Intrinsic::amdgcn_ds_gws_barrier:
2592 return AMDGPU::DS_GWS_BARRIER;
2593 case Intrinsic::amdgcn_ds_gws_sema_v:
2594 return AMDGPU::DS_GWS_SEMA_V;
2595 case Intrinsic::amdgcn_ds_gws_sema_br:
2596 return AMDGPU::DS_GWS_SEMA_BR;
2597 case Intrinsic::amdgcn_ds_gws_sema_p:
2598 return AMDGPU::DS_GWS_SEMA_P;
2599 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2600 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2606void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2607 if (!Subtarget->
hasGWS() ||
2608 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2616 const bool HasVSrc =
N->getNumOperands() == 4;
2617 assert(HasVSrc ||
N->getNumOperands() == 3);
2620 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2631 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2637 ImmOffset = ConstOffset->getZExtValue();
2655 glueCopyToM0(
N,
SDValue(M0Base, 0));
2672void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2730void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2731 unsigned IntrID =
N->getConstantOperandVal(1);
2733 case Intrinsic::amdgcn_ds_append:
2734 case Intrinsic::amdgcn_ds_consume: {
2735 if (
N->getValueType(0) != MVT::i32)
2737 SelectDSAppendConsume(
N, IntrID);
2740 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2741 SelectDSBvhStackIntrinsic(
N);
2748void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2749 unsigned IntrID =
N->getConstantOperandVal(0);
2750 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2751 SDNode *ConvGlueNode =
N->getGluedNode();
2758 MVT::Glue,
SDValue(ConvGlueNode, 0));
2760 ConvGlueNode =
nullptr;
2763 case Intrinsic::amdgcn_wqm:
2764 Opcode = AMDGPU::WQM;
2766 case Intrinsic::amdgcn_softwqm:
2767 Opcode = AMDGPU::SOFT_WQM;
2769 case Intrinsic::amdgcn_wwm:
2770 case Intrinsic::amdgcn_strict_wwm:
2771 Opcode = AMDGPU::STRICT_WWM;
2773 case Intrinsic::amdgcn_strict_wqm:
2774 Opcode = AMDGPU::STRICT_WQM;
2776 case Intrinsic::amdgcn_interp_p1_f16:
2777 SelectInterpP1F16(
N);
2784 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2791 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2796void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2797 unsigned IntrID =
N->getConstantOperandVal(1);
2799 case Intrinsic::amdgcn_ds_gws_init:
2800 case Intrinsic::amdgcn_ds_gws_barrier:
2801 case Intrinsic::amdgcn_ds_gws_sema_v:
2802 case Intrinsic::amdgcn_ds_gws_sema_br:
2803 case Intrinsic::amdgcn_ds_gws_sema_p:
2804 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2805 SelectDS_GWS(
N, IntrID);
2814void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2818 {N->getOperand(0), Log2WaveSize});
2821void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2838 if (
N->isDivergent()) {
2845 {SrcVal, Log2WaveSize}),
2853bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2855 bool IsCanonicalizing,
2856 bool AllowAbs)
const {
2862 Src = Src.getOperand(0);
2863 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2866 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2867 if (LHS &&
LHS->isZero()) {
2869 Src = Src.getOperand(1);
2873 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2875 Src = Src.getOperand(0);
2884 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2893bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2896 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2905bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2908 if (SelectVOP3ModsImpl(In, Src, Mods,
2918bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2926bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2930 if (SelectVOP3ModsImpl(In, Src, Mods,
2942bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2944 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2947bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2949 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2952bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2959 return SelectVOP3Mods(In, Src, SrcMods);
2962bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2969 return SelectVOP3BMods(In, Src, SrcMods);
2972bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
2983bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
2984 SDValue &SrcMods,
bool IsDOT)
const {
2991 Src = Src.getOperand(0);
2996 unsigned VecMods = Mods;
2998 SDValue Lo = stripBitcast(Src.getOperand(0));
2999 SDValue Hi = stripBitcast(Src.getOperand(1));
3002 Lo = stripBitcast(
Lo.getOperand(0));
3007 Hi = stripBitcast(
Hi.getOperand(0));
3017 unsigned VecSize = Src.getValueSizeInBits();
3018 Lo = stripExtractLoElt(
Lo);
3019 Hi = stripExtractLoElt(
Hi);
3021 if (
Lo.getValueSizeInBits() > VecSize) {
3023 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3027 if (
Hi.getValueSizeInBits() > VecSize) {
3029 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3033 assert(
Lo.getValueSizeInBits() <= VecSize &&
3034 Hi.getValueSizeInBits() <= VecSize);
3036 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3040 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
3043 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3048 Lo.getValueType()), 0);
3049 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3050 : AMDGPU::SReg_64RegClassID;
3057 Src.getValueType(), Ops), 0);
3063 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3064 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3065 .bitcastToAPInt().getZExtValue();
3083bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3085 return SelectVOP3PMods(In, Src, SrcMods,
true);
3088bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3092 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3095 unsigned SrcSign =
C->getZExtValue();
3103bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3106 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3109 unsigned SrcVal =
C->getZExtValue();
3120 unsigned DstRegClass;
3122 switch (Elts.
size()) {
3124 DstRegClass = AMDGPU::VReg_256RegClassID;
3128 DstRegClass = AMDGPU::VReg_128RegClassID;
3132 DstRegClass = AMDGPU::VReg_64RegClassID;
3141 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3153 assert(
"unhandled Reg sequence size" &&
3154 (Elts.
size() == 8 || Elts.
size() == 16));
3158 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3159 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3167 {Elts[i + 1], Elts[i], PackLoLo});
3177 const SDLoc &
DL,
unsigned ElementSize) {
3178 if (ElementSize == 16)
3180 if (ElementSize == 32)
3188 unsigned ElementSize) {
3193 for (
auto El : Elts) {
3196 NegAbsElts.
push_back(El->getOperand(0));
3198 if (Elts.size() != NegAbsElts.
size()) {
3218 std::function<
bool(
SDValue)> ModifierCheck) {
3221 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3222 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3223 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3224 if (!ModifierCheck(ElF16))
3231bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3237 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3256 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3278bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3285 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3289 if (EltsF16.
empty())
3304 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3310 if (EltsV2F16.
empty())
3327bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3333 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3337 unsigned ModOpcode =
3356bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3357 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3360 if (isInlineImmediate(
Splat.getNode())) {
3362 unsigned Imm =
C->getAPIntValue().getSExtValue();
3367 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3376 SDValue SplatSrc32 = stripBitcast(In);
3377 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3378 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3379 SDValue SplatSrc16 = stripBitcast(Splat32);
3380 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3383 std::optional<APInt> RawValue;
3385 RawValue =
C->getValueAPF().bitcastToAPInt();
3387 RawValue =
C->getAPIntValue();
3389 if (RawValue.has_value()) {
3390 EVT VT =
In.getValueType().getScalarType();
3396 if (
TII->isInlineConstant(FloatVal)) {
3402 if (
TII->isInlineConstant(RawValue.value())) {
3416bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3435bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3454bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3462bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3465 return SelectVOP3Mods(In, Src, SrcMods);
3470bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3471 unsigned &Mods)
const {
3473 SelectVOP3ModsImpl(In, Src, Mods);
3476 Src = Src.getOperand(0);
3477 assert(Src.getValueType() == MVT::f16);
3478 Src = stripBitcast(Src);
3484 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3511bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3514 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3520bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3523 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3540 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3550bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3559 bool AllUsesAcceptSReg =
true;
3561 Limit < 10 &&
U != E; ++
U, ++Limit) {
3570 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3571 AllUsesAcceptSReg =
false;
3573 if (
User->isMachineOpcode()) {
3574 unsigned Opc =
User->getMachineOpcode();
3576 if (
Desc.isCommutable()) {
3577 unsigned OpIdx =
Desc.getNumDefs() +
U.getOperandNo();
3580 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3582 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3583 CommutedRC == &AMDGPU::VS_64RegClass)
3584 AllUsesAcceptSReg =
true;
3592 if (!AllUsesAcceptSReg)
3596 return !AllUsesAcceptSReg && (Limit < 10);
3599bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3600 auto Ld = cast<LoadSDNode>(
N);
3616 ->isMemOpHasNoClobberedMemOperand(
N)));
3622 bool IsModified =
false;
3629 SDNode *Node = &*Position++;
3635 if (ResNode != Node) {
3642 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static bool EnableLateStructurizeCFG
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
uint64_t getZExtValue() const
Get zero extended value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
unsigned M0(unsigned Val)
Implement std::hash so that hash_code can be used in STL containers.
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.