29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#ifdef EXPENSIVE_CHECKS
37#define DEBUG_TYPE "amdgpu-isel"
52 In = stripBitcast(In);
58 Out = In.getOperand(0);
69 if (ShiftAmt->getZExtValue() == 16) {
84 if (
Idx->isZero() && In.getValueSizeInBits() <= 32)
85 return In.getOperand(0);
91 if (Src.getValueType().getSizeInBits() == 32)
92 return stripBitcast(Src);
101 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
105#ifdef EXPENSIVE_CHECKS
110 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
115 CodeGenOpt::Level OptLevel) {
126#ifdef EXPENSIVE_CHECKS
127 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
128 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
130 assert(L->isLCSSAForm(DT));
138bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
203#ifdef EXPENSIVE_CHECKS
212 MVT VT =
N->getValueType(0).getSimpleVT();
219 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
256 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
257 if (LdLo &&
Lo.hasOneUse()) {
296 bool MadeChange =
false;
302 switch (
N->getOpcode()) {
318bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N,
319 bool Negated)
const {
326 return TII->isInlineConstant(-
C->getAPIntValue());
329 return TII->isInlineConstant(-
C->getValueAPF().bitcastToAPInt());
333 return TII->isInlineConstant(
C->getAPIntValue());
336 return TII->isInlineConstant(
C->getValueAPF().bitcastToAPInt());
347 unsigned OpNo)
const {
348 if (!
N->isMachineOpcode()) {
350 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
351 if (
Reg.isVirtual()) {
353 return MRI.getRegClass(Reg);
357 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
358 return TRI->getPhysRegBaseClass(Reg);
364 switch (
N->getMachineOpcode()) {
371 int RegClass = Desc.
operands()[OpIdx].RegClass;
377 case AMDGPU::REG_SEQUENCE: {
378 unsigned RCID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
382 SDValue SubRegOp =
N->getOperand(OpNo + 1);
383 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
392 SmallVector <SDValue, 8> Ops;
394 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
408 return glueCopyToOp(
N,
M0,
M0.getValue(1));
411SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
412 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
442 EVT VT =
N->getValueType(0);
448 if (NumVectorElts == 1) {
454 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
464 bool IsRegSeq =
true;
465 unsigned NOps =
N->getNumOperands();
466 for (
unsigned i = 0; i < NOps; i++) {
468 if (isa<RegisterSDNode>(
N->getOperand(i))) {
474 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
477 if (NOps != NumVectorElts) {
482 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
485 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
486 RegSeqArgs[1 + (2 * i) + 1] =
497 unsigned int Opc =
N->getOpcode();
498 if (
N->isMachineOpcode()) {
508 N = glueCopyToM0LDSInit(
N);
526 SelectADD_SUB_I64(
N);
538 SelectUADDO_USUBO(
N);
542 SelectFMUL_W_CHAIN(
N);
546 SelectFMA_W_CHAIN(
N);
552 EVT VT =
N->getValueType(0);
566 unsigned RegClassID =
578 }
else if (
N->getValueType(0) ==
MVT::i64) {
585 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
586 N->getOperand(1), SubReg1 };
588 N->getValueType(0), Ops));
594 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
599 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
602 Imm =
C->getZExtValue();
630 uint32_t WidthVal = Width->getZExtValue();
647 return SelectMUL_LOHI(
N);
675 { N->getOperand(0), N->getOperand(1) });
683 SelectINTRINSIC_W_CHAIN(
N);
687 SelectINTRINSIC_WO_CHAIN(
N);
691 SelectINTRINSIC_VOID(
N);
699bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
702 return Term->getMetadata(
"amdgpu.uniform") ||
703 Term->getMetadata(
"structurizecfg.uniform");
706bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
707 unsigned ShAmtBits)
const {
710 const APInt &
RHS = cast<ConstantSDNode>(
N->getOperand(1))->getAPIntValue();
711 if (
RHS.countr_one() >= ShAmtBits)
715 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
741 N1 =
Lo.getOperand(1);
758 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
766 return "AMDGPU DAG->DAG Pattern Instruction Selection";
783 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
787 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
791 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
802SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
811void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
816 unsigned Opcode =
N->getOpcode();
837 static const unsigned OpcMap[2][2][2] = {
838 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
839 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
840 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
841 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
843 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
844 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
880void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
886 if (
N->isDivergent()) {
887 unsigned Opc =
N->getOpcode() ==
ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
888 : AMDGPU::V_SUBB_U32_e64;
890 N, Opc,
N->getVTList(),
892 CurDAG->getTargetConstant(0, {},
MVT::i1) });
894 unsigned Opc =
N->getOpcode() ==
ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
895 : AMDGPU::S_SUB_CO_PSEUDO;
896 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
900void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
905 bool IsVALU =
N->isDivergent();
909 if (UI.getUse().getResNo() == 1) {
918 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
921 N, Opc,
N->getVTList(),
922 {N->getOperand(0), N->getOperand(1),
923 CurDAG->getTargetConstant(0, {},
MVT::i1) });
925 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
926 : AMDGPU::S_USUBO_PSEUDO;
928 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
929 {
N->getOperand(0),
N->getOperand(1)});
933void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
938 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
939 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
940 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
941 Ops[8] =
N->getOperand(0);
942 Ops[9] =
N->getOperand(4);
947 cast<ConstantSDNode>(Ops[0])->isZero() &&
948 cast<ConstantSDNode>(Ops[2])->isZero() &&
949 cast<ConstantSDNode>(Ops[4])->isZero();
950 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
954void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
959 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
960 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
961 Ops[6] =
N->getOperand(0);
962 Ops[7] =
N->getOperand(3);
969void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
971 EVT VT =
N->getValueType(0);
976 = (VT ==
MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
981 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
982 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
983 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
989void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
994 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
995 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
997 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1000 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1007void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1012 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1013 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1015 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1019 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1065 int64_t ByteOffset =
C->getSExtValue();
1066 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1073 Zero,
Addr.getOperand(1));
1075 if (isDSOffsetLegal(Sub, ByteOffset)) {
1081 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1083 SubOp = AMDGPU::V_SUB_U32_e64;
1105 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1121bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1123 unsigned Size)
const {
1124 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1126 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1138bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Base,
1151 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1157 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1162 unsigned Size)
const {
1170 unsigned OffsetValue1 = OffsetValue0 +
Size;
1173 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1182 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1183 unsigned OffsetValue0 =
C->getZExtValue();
1184 unsigned OffsetValue1 = OffsetValue0 +
Size;
1186 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1196 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1200 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1202 SubOp = AMDGPU::V_SUB_U32_e64;
1218 unsigned OffsetValue0 = CAddr->getZExtValue();
1219 unsigned OffsetValue1 = OffsetValue0 +
Size;
1221 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1259 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1261 N0 =
Addr.getOperand(0);
1335 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1339 if (
C->getSExtValue()) {
1352std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1355 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1366bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1378 int64_t
Imm = CAddr->getSExtValue();
1379 const int64_t NullPtr =
1382 if (Imm != NullPtr) {
1387 AMDGPU::V_MOV_B32_e32,
DL,
MVT::i32, HighBits);
1388 VAddr =
SDValue(MovHighBits, 0);
1421 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1428 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1436 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1437 if (!Reg.isPhysical())
1439 auto RC =
TRI.getPhysRegBaseClass(Reg);
1440 return RC &&
TRI.isSGPRClass(RC);
1443bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1465 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1471 SOffset =
Addr.getOperand(0);
1472 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1493 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1496 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1497 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1498 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1518 assert(isa<BuildVectorSDNode>(
N));
1529 int64_t OffsetVal = 0;
1533 bool CanHaveFlatSegmentOffsetBug =
1540 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1541 isFlatScratchBaseLegal(N0, FlatVariant)) {
1542 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1545 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1547 OffsetVal = COffsetVal;
1562 std::tie(OffsetVal, RemainderOffset) =
1563 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1566 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1569 if (
Addr.getValueType().getSizeInBits() == 32) {
1573 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1575 AddOp = AMDGPU::V_ADD_U32_e64;
1591 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1597 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1600 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1635 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1644 SDValue ExtSrc = Op.getOperand(0);
1649bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1654 int64_t ImmOffset = 0;
1660 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1661 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1667 ImmOffset = COffsetVal;
1668 }
else if (!
LHS->isDivergent()) {
1669 if (COffsetVal > 0) {
1674 int64_t SplitImmOffset, RemainderOffset;
1675 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1678 if (isUInt<32>(RemainderOffset)) {
1680 AMDGPU::V_MOV_B32_e32, SL,
MVT::i32,
1694 unsigned NumLiterals =
1695 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1696 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1707 if (!
LHS->isDivergent()) {
1715 if (!SAddr && !
RHS->isDivergent()) {
1730 isa<ConstantSDNode>(
Addr))
1745 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1748 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1751 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1753 FI->getValueType(0));
1766 if (
Addr->isDivergent())
1771 int64_t COffsetVal = 0;
1774 isFlatScratchBaseLegal(
Addr.getOperand(0))) {
1775 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1776 SAddr =
Addr.getOperand(0);
1787 int64_t SplitImmOffset, RemainderOffset;
1788 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1791 COffsetVal = SplitImmOffset;
1795 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1796 :
CurDAG->getTargetConstant(RemainderOffset,
DL,
MVT::i32);
1808bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1822 return (VMax & 3) + (
SMax & 3) >= 4;
1828 int64_t ImmOffset = 0;
1831 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1832 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1837 ImmOffset = COffsetVal;
1838 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1842 int64_t SplitImmOffset, RemainderOffset;
1843 std::tie(SplitImmOffset, RemainderOffset)
1846 if (isUInt<32>(RemainderOffset)) {
1848 AMDGPU::V_MOV_B32_e32, SL,
MVT::i32,
1852 if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1854 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1868 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1871 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1878 if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1881 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1891bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
1893 bool Imm32Only,
bool IsBuffer)
const {
1895 "Cannot match both soffset and offset at the same time!");
1903 *SOffset = ByteOffsetNode;
1915 SDLoc SL(ByteOffsetNode);
1919 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
1920 std::optional<int64_t> EncodedOffset =
1921 AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
1922 if (EncodedOffset &&
Offset && !Imm32Only) {
1931 EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1932 if (EncodedOffset &&
Offset && Imm32Only) {
1937 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1959 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
1981 bool IsBuffer)
const {
1983 assert(!Imm32Only && !IsBuffer);
1985 return SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset) &&
1986 SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr);
1992 !
Addr->getFlags().hasNoUnsignedWrap())
1998 N0 =
Addr.getOperand(0);
1999 N1 =
Addr.getOperand(1);
2001 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2005 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer)) {
2009 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer)) {
2018 bool Imm32Only)
const {
2019 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2020 SBase = Expand32BitAddress(SBase);
2035 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2041 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2047 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2053 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2057 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2061bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2064 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2068bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2073 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2078bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2100 if (isa<ConstantSDNode>(
Index))
2108SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2112 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2118 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2128void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2133 const SDValue &Shl =
N->getOperand(0);
2141 if (0 < BVal && BVal <= CVal && CVal < 32) {
2151void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2152 switch (
N->getOpcode()) {
2154 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2157 const SDValue &Srl =
N->getOperand(0);
2161 if (Shift && Mask) {
2175 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2182 if (Shift && Mask) {
2193 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2194 SelectS_BFEFromShifts(
N);
2199 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2200 SelectS_BFEFromShifts(
N);
2211 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2215 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2225bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2227 if (!
N->hasOneUse())
2237 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2251void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2254 if (
Cond.isUndef()) {
2256 N->getOperand(2),
N->getOperand(0));
2263 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2264 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2265 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2283 : AMDGPU::S_AND_B64,
2298void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2301 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2302 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2315 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2316 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2317 N = glueCopyToM0(
N, PtrBase);
2323 N = glueCopyToM0(
N,
Ptr);
2331 N->getOperand(
N->getNumOperands() - 1)
2340void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2341 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2342 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2343 N->getOperand(5),
N->getOperand(0)};
2353 case Intrinsic::amdgcn_ds_gws_init:
2354 return AMDGPU::DS_GWS_INIT;
2355 case Intrinsic::amdgcn_ds_gws_barrier:
2356 return AMDGPU::DS_GWS_BARRIER;
2357 case Intrinsic::amdgcn_ds_gws_sema_v:
2358 return AMDGPU::DS_GWS_SEMA_V;
2359 case Intrinsic::amdgcn_ds_gws_sema_br:
2360 return AMDGPU::DS_GWS_SEMA_BR;
2361 case Intrinsic::amdgcn_ds_gws_sema_p:
2362 return AMDGPU::DS_GWS_SEMA_P;
2363 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2364 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2370void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2371 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2379 const bool HasVSrc =
N->getNumOperands() == 4;
2380 assert(HasVSrc ||
N->getNumOperands() == 3);
2383 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2394 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2400 ImmOffset = ConstOffset->getZExtValue();
2418 glueCopyToM0(
N,
SDValue(M0Base, 0));
2435void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2493void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2494 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2496 case Intrinsic::amdgcn_ds_append:
2497 case Intrinsic::amdgcn_ds_consume: {
2500 SelectDSAppendConsume(
N, IntrID);
2503 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2504 SelectDSBvhStackIntrinsic(
N);
2511void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2512 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
2515 case Intrinsic::amdgcn_wqm:
2516 Opcode = AMDGPU::WQM;
2518 case Intrinsic::amdgcn_softwqm:
2519 Opcode = AMDGPU::SOFT_WQM;
2521 case Intrinsic::amdgcn_wwm:
2522 case Intrinsic::amdgcn_strict_wwm:
2523 Opcode = AMDGPU::STRICT_WWM;
2525 case Intrinsic::amdgcn_strict_wqm:
2526 Opcode = AMDGPU::STRICT_WQM;
2528 case Intrinsic::amdgcn_interp_p1_f16:
2529 SelectInterpP1F16(
N);
2540void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2541 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2543 case Intrinsic::amdgcn_ds_gws_init:
2544 case Intrinsic::amdgcn_ds_gws_barrier:
2545 case Intrinsic::amdgcn_ds_gws_sema_v:
2546 case Intrinsic::amdgcn_ds_gws_sema_br:
2547 case Intrinsic::amdgcn_ds_gws_sema_p:
2548 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2549 SelectDS_GWS(
N, IntrID);
2558bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2560 bool AllowAbs)
const {
2566 Src = Src.getOperand(0);
2569 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2571 Src = Src.getOperand(0);
2580 if (SelectVOP3ModsImpl(In, Src, Mods)) {
2588bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2591 if (SelectVOP3ModsImpl(In, Src, Mods,
false)) {
2599bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2607bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2611 if (SelectVOP3ModsImpl(In, Src, Mods,
false)) {
2621bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2623 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2626bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2628 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2631bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2638 return SelectVOP3Mods(In, Src, SrcMods);
2641bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2648 return SelectVOP3BMods(In, Src, SrcMods);
2651bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
2662bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
2663 SDValue &SrcMods,
bool IsDOT)
const {
2669 Src = Src.getOperand(0);
2674 unsigned VecMods = Mods;
2676 SDValue Lo = stripBitcast(Src.getOperand(0));
2677 SDValue Hi = stripBitcast(Src.getOperand(1));
2680 Lo = stripBitcast(
Lo.getOperand(0));
2685 Hi = stripBitcast(
Hi.getOperand(0));
2695 unsigned VecSize = Src.getValueSizeInBits();
2696 Lo = stripExtractLoElt(
Lo);
2697 Hi = stripExtractLoElt(
Hi);
2699 if (
Lo.getValueSizeInBits() > VecSize) {
2701 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2705 if (
Hi.getValueSizeInBits() > VecSize) {
2707 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2711 assert(
Lo.getValueSizeInBits() <= VecSize &&
2712 Hi.getValueSizeInBits() <= VecSize);
2714 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
2718 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
2721 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
2726 Lo.getValueType()), 0);
2727 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2728 : AMDGPU::SReg_64RegClassID;
2735 Src.getValueType(), Ops), 0);
2741 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
2742 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
2743 .bitcastToAPInt().getZExtValue();
2761bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
2763 return SelectVOP3PMods(In, Src, SrcMods,
true);
2766bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(
SDValue In,
SDValue &Src)
const {
2770 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
2773 unsigned SrcSign =
C->getAPIntValue().getZExtValue();
2781bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
2784 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
2787 unsigned SrcVal =
C->getAPIntValue().getZExtValue();
2795bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
2803bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
2806 return SelectVOP3Mods(In, Src, SrcMods);
2811bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
2812 unsigned &Mods)
const {
2814 SelectVOP3ModsImpl(In, Src, Mods);
2817 Src = Src.getOperand(0);
2819 Src = stripBitcast(Src);
2825 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2852bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
2855 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
2861bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
2864 SelectVOP3PMadMixModsImpl(In, Src, Mods);
2881 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL,
MVT::i32);
2891bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
2900 bool AllUsesAcceptSReg =
true;
2902 Limit < 10 &&
U !=
E; ++
U, ++Limit) {
2911 if (RC != &AMDGPU::VS_32RegClass) {
2912 AllUsesAcceptSReg =
false;
2914 if (
User->isMachineOpcode()) {
2915 unsigned Opc =
User->getMachineOpcode();
2918 unsigned OpIdx = Desc.
getNumDefs() +
U.getOperandNo();
2921 unsigned CommutedOpNo = CommuteIdx1 - Desc.
getNumDefs();
2923 if (CommutedRC == &AMDGPU::VS_32RegClass)
2924 AllUsesAcceptSReg =
true;
2932 if (!AllUsesAcceptSReg)
2936 return !AllUsesAcceptSReg && (Limit < 10);
2939bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
2940 auto Ld = cast<LoadSDNode>(
N);
2945 return Ld->getAlign() >=
Align(4) &&
2952 ->isMemOpHasNoClobberedMemOperand(
N)));
2958 bool IsModified =
false;
2965 SDNode *Node = &*Position++;
2971 if (ResNode != Node) {
2978 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue matchZExtFromI32(SDValue Op)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static MachineInstr * isExtractHiElt(MachineInstr *Inst, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPUDAGToDAGISel()=delete
bool matchLoadD16FromBuildVector(SDNode *N) const
static bool isUniformMMO(const MachineMemOperand *MMO)
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static bool EnableLateStructurizeCFG
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
uint64_t getZExtValue() const
Get zero extended value.
unsigned countr_one() const
Count the number of trailing one bits.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
uint64_t getZExtValue() const
int64_t getSExtValue() const
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static bool isLegalMUBUFImmOffset(unsigned Imm)
static unsigned getMaxMUBUFImmOffset()
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
Iterator for intrusive lists based on ilist_node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Level
Code generation optimization level.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
unsigned M0(unsigned Val)
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS)
Compute known bits resulting from adding LHS and RHS.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.