26 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #ifdef EXPENSIVE_CHECKS
34 #define DEBUG_TYPE "isel"
50 In = stripBitcast(
In);
56 Out =
In.getOperand(0);
67 if (ShiftAmt->getZExtValue() == 16) {
82 if (Idx->isZero() &&
In.getValueSizeInBits() <= 32)
83 return In.getOperand(0);
89 if (Src.getValueType().getSizeInBits() == 32)
90 return stripBitcast(Src);
99 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
103 #ifdef EXPENSIVE_CHECKS
108 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
113 CodeGenOpt::
Level OptLevel) {
121 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
125 #ifdef EXPENSIVE_CHECKS
126 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
127 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
129 assert(L->isLCSSAForm(DT));
134 return SelectionDAGISel::runOnMachineFunction(
MF);
137 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
187 return Subtarget->
getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
191 return Subtarget->
getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
202 #ifdef EXPENSIVE_CHECKS
206 SelectionDAGISel::getAnalysisUsage(AU);
211 MVT VT =
N->getValueType(0).getSimpleVT();
212 if (VT != MVT::v2i16 && VT != MVT::v2f16)
218 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
255 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
256 if (LdLo &&
Lo.hasOneUse()) {
295 bool MadeChange =
false;
301 switch (
N->getOpcode()) {
317 bool AMDGPUDAGToDAGISel::isNoNanSrc(
SDValue N)
const {
322 if (
N->getFlags().hasNoNaNs())
328 bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N,
329 bool Negated)
const {
336 return TII->isInlineConstant(-
C->getAPIntValue());
339 return TII->isInlineConstant(-
C->getValueAPF().bitcastToAPInt());
343 return TII->isInlineConstant(
C->getAPIntValue());
346 return TII->isInlineConstant(
C->getValueAPF().bitcastToAPInt());
357 unsigned OpNo)
const {
358 if (!
N->isMachineOpcode()) {
360 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
361 if (
Reg.isVirtual()) {
367 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
368 return TRI->getPhysRegClass(
Reg);
374 switch (
N->getMachineOpcode()) {
387 case AMDGPU::REG_SEQUENCE: {
388 unsigned RCID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
392 SDValue SubRegOp =
N->getOperand(OpNo + 1);
393 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
403 Ops.push_back(NewChain);
404 for (
unsigned i = 1,
e =
N->getNumOperands();
i !=
e; ++
i)
405 Ops.push_back(
N->getOperand(
i));
418 return glueCopyToOp(
N,
M0,
M0.getValue(1));
421 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
422 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
452 EVT VT =
N->getValueType(0);
458 if (NumVectorElts == 1) {
464 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
474 bool IsRegSeq =
true;
475 unsigned NOps =
N->getNumOperands();
476 for (
unsigned i = 0;
i < NOps;
i++) {
478 if (isa<RegisterSDNode>(
N->getOperand(
i))) {
482 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(
i)
483 : R600RegisterInfo::getSubRegFromChannel(
i);
484 RegSeqArgs[1 + (2 *
i)] =
N->getOperand(
i);
487 if (NOps != NumVectorElts) {
492 for (
unsigned i = NOps;
i < NumVectorElts; ++
i) {
493 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(
i)
494 : R600RegisterInfo::getSubRegFromChannel(
i);
495 RegSeqArgs[1 + (2 *
i)] =
SDValue(ImpDef, 0);
496 RegSeqArgs[1 + (2 *
i) + 1] =
507 unsigned int Opc =
N->getOpcode();
508 if (
N->isMachineOpcode()) {
520 N = glueCopyToM0LDSInit(
N);
538 SelectADD_SUB_I64(
N);
550 SelectUADDO_USUBO(
N);
554 SelectFMUL_W_CHAIN(
N);
558 SelectFMA_W_CHAIN(
N);
564 EVT VT =
N->getValueType(0);
578 unsigned RegClassID =
579 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
586 if (
N->getValueType(0) == MVT::i128) {
590 }
else if (
N->getValueType(0) ==
MVT::i64) {
597 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
598 N->getOperand(1), SubReg1 };
600 N->getValueType(0), Ops));
606 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
611 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
614 Imm =
C->getZExtValue();
659 return SelectMUL_LOHI(
N);
691 { N->getOperand(0), N->getOperand(1) });
699 SelectINTRINSIC_W_CHAIN(
N);
703 SelectINTRINSIC_WO_CHAIN(
N);
707 SelectINTRINSIC_VOID(
N);
715 bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
718 return Term->getMetadata(
"amdgpu.uniform") ||
719 Term->getMetadata(
"structurizecfg.uniform");
722 bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
723 unsigned ShAmtBits)
const {
726 const APInt &
RHS = cast<ConstantSDNode>(
N->getOperand(1))->getAPIntValue();
727 if (
RHS.countTrailingOnes() >= ShAmtBits)
757 N1 =
Lo.getOperand(1);
782 return "AMDGPU DAG->DAG Pattern Instruction Selection";
799 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
803 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
807 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
818 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
827 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
832 unsigned Opcode =
N->getOpcode();
853 static const unsigned OpcMap[2][2][2] = {
854 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
855 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
856 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
857 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
859 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
860 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
896 void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
902 if (
N->isDivergent()) {
903 unsigned Opc =
N->getOpcode() ==
ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
904 : AMDGPU::V_SUBB_U32_e64;
906 N, Opc,
N->getVTList(),
908 CurDAG->getTargetConstant(0, {},
MVT::i1) });
910 unsigned Opc =
N->getOpcode() ==
ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
911 : AMDGPU::S_SUB_CO_PSEUDO;
912 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
916 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
921 bool IsVALU =
N->isDivergent();
925 if (UI.getUse().getResNo() == 1) {
934 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
937 N, Opc,
N->getVTList(),
938 {N->getOperand(0), N->getOperand(1),
939 CurDAG->getTargetConstant(0, {},
MVT::i1) });
941 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
942 : AMDGPU::S_USUBO_PSEUDO;
944 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
945 {
N->getOperand(0),
N->getOperand(1)});
949 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
954 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
955 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
956 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
957 Ops[8] =
N->getOperand(0);
958 Ops[9] =
N->getOperand(4);
963 cast<ConstantSDNode>(Ops[0])->isZero() &&
964 cast<ConstantSDNode>(Ops[2])->isZero() &&
965 cast<ConstantSDNode>(Ops[4])->isZero();
966 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
970 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
975 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
976 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
977 Ops[6] =
N->getOperand(0);
978 Ops[7] =
N->getOperand(3);
985 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
987 EVT VT =
N->getValueType(0);
992 = (VT ==
MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
997 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
998 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
999 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1005 void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1008 unsigned Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1011 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1018 void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1021 unsigned Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1025 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), Zero,
Clamp};
1062 if (isDSOffsetLegal(N0,
C1->getSExtValue())) {
1071 int64_t ByteOffset =
C->getSExtValue();
1072 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1079 Zero,
Addr.getOperand(1));
1081 if (isDSOffsetLegal(Sub, ByteOffset)) {
1083 Opnds.push_back(Zero);
1084 Opnds.push_back(
Addr.getOperand(1));
1087 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1089 SubOp = AMDGPU::V_SUB_U32_e64;
1111 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1127 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1129 unsigned Size)
const {
1130 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1148 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1154 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1159 unsigned Size)
const {
1166 unsigned OffsetValue0 =
C1->getZExtValue();
1167 unsigned OffsetValue1 = OffsetValue0 +
Size;
1170 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1179 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1180 unsigned OffsetValue0 =
C->getZExtValue();
1181 unsigned OffsetValue1 = OffsetValue0 +
Size;
1183 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1193 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1195 Opnds.push_back(Zero);
1196 Opnds.push_back(
Addr.getOperand(1));
1197 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1199 SubOp = AMDGPU::V_SUB_U32_e64;
1205 SubOp,
DL, MVT::getIntegerVT(
Size * 8), Opnds);
1215 unsigned OffsetValue0 = CAddr->getZExtValue();
1216 unsigned OffsetValue1 = OffsetValue0 +
Size;
1218 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1256 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1258 N0 =
Addr.getOperand(0);
1306 if (SIInstrInfo::isLegalMUBUFImmOffset(
C1->getZExtValue())) {
1325 SDValue Ptr, Offen, Idxen, Addr64;
1332 if (!SelectMUBUF(
Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1336 if (
C->getSExtValue()) {
1349 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1352 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1363 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1375 int64_t
Imm = CAddr->getSExtValue();
1376 const int64_t NullPtr =
1379 if (
Imm != NullPtr) {
1382 AMDGPU::V_MOV_B32_e32,
DL,
MVT::i32, HighBits);
1383 VAddr =
SDValue(MovHighBits, 0);
1413 if (SIInstrInfo::isLegalMUBUFImmOffset(
C1->getZExtValue()) &&
1416 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1423 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1432 TRI.getPhysRegClass(cast<RegisterSDNode>(Val.
getOperand(1))->getReg());
1433 return RC &&
TRI.isSGPRClass(RC);
1436 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1458 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1459 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->
getZExtValue()))
1464 SOffset =
Addr.getOperand(0);
1465 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1466 SIInstrInfo::isLegalMUBUFImmOffset(CAddr->
getZExtValue())) {
1482 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1486 if (!SelectMUBUF(
Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1489 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1490 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1491 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1493 APInt::getAllOnes(32).getZExtValue();
1508 N = AMDGPUTargetLowering::stripBitcast(
SDValue(
N,0)).getNode();
1511 assert(isa<BuildVectorSDNode>(
N));
1514 dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1522 int64_t OffsetVal = 0;
1526 bool CanHaveFlatSegmentOffsetBug =
1533 if (isBaseWithConstantOffset64(
Addr, N0, N1)) {
1534 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1537 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1539 OffsetVal = COffsetVal;
1554 std::tie(OffsetVal, RemainderOffset) =
1555 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1558 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1561 if (
Addr.getValueType().getSizeInBits() == 32) {
1563 Opnds.push_back(N0);
1564 Opnds.push_back(AddOffsetLo);
1565 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1567 AddOp = AMDGPU::V_ADD_U32_e64;
1568 Opnds.push_back(Clamp);
1583 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1592 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1627 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1641 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1646 int64_t ImmOffset = 0;
1652 if (isBaseWithConstantOffset64(
Addr,
LHS,
RHS)) {
1653 int64_t COffsetVal = cast<ConstantSDNode>(
RHS)->getSExtValue();
1659 ImmOffset = COffsetVal;
1660 }
else if (!
LHS->isDivergent()) {
1661 if (COffsetVal > 0) {
1666 int64_t SplitImmOffset, RemainderOffset;
1667 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1672 AMDGPU::V_MOV_B32_e32, SL,
MVT::i32,
1686 unsigned NumLiterals =
1687 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1688 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1699 if (!
LHS->isDivergent()) {
1707 if (!SAddr && !
RHS->isDivergent()) {
1722 isa<ConstantSDNode>(
Addr))
1737 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1740 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1743 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1745 FI->getValueType(0));
1758 if (
Addr->isDivergent())
1763 int64_t COffsetVal = 0;
1766 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1767 SAddr =
Addr.getOperand(0);
1778 int64_t SplitImmOffset, RemainderOffset;
1779 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1782 COffsetVal = SplitImmOffset;
1786 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1801 int64_t ImmOffset = 0;
1804 if (isBaseWithConstantOffset64(
Addr,
LHS,
RHS)) {
1805 int64_t COffsetVal = cast<ConstantSDNode>(
RHS)->getSExtValue();
1810 ImmOffset = COffsetVal;
1811 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1815 int64_t SplitImmOffset, RemainderOffset;
1816 std::tie(SplitImmOffset, RemainderOffset)
1821 AMDGPU::V_MOV_B32_e32, SL,
MVT::i32,
1837 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1840 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1852 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
1872 SDLoc SL(ByteOffsetNode);
1874 int64_t ByteOffset =
C->getSExtValue();
1877 if (EncodedOffset) {
1888 if (EncodedOffset) {
1912 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
1935 Addr->getFlags().hasNoUnsignedWrap())) {
1940 N0 =
Addr.getOperand(0);
1941 N1 =
Addr.getOperand(1);
1943 assert(N0 && N1 && isa<ConstantSDNode>(N1));
1946 if (SelectSMRDOffset(N1,
Offset,
Imm)) {
1947 SBase = Expand32BitAddress(N0);
1952 SBase = Expand32BitAddress(
Addr);
1973 return !
Imm && isa<ConstantSDNode>(
Offset);
1980 !isa<ConstantSDNode>(
Offset);
1983 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(
SDValue Addr,
1997 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue Addr,
2003 C->getZExtValue())) {
2012 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2034 if (isa<ConstantSDNode>(
Index))
2042 SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2046 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2052 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2062 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2067 const SDValue &Shl =
N->getOperand(0);
2075 if (0 < BVal && BVal <= CVal && CVal < 32) {
2085 void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2086 switch (
N->getOpcode()) {
2088 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2091 const SDValue &Srl =
N->getOperand(0);
2109 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2127 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2128 SelectS_BFEFromShifts(
N);
2133 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2134 SelectS_BFEFromShifts(
N);
2145 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2149 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2159 bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2161 if (!
N->hasOneUse())
2171 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2185 void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2188 if (
Cond.isUndef()) {
2190 N->getOperand(2),
N->getOperand(0));
2197 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2198 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2217 : AMDGPU::S_AND_B64,
2232 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(
SDNode *
N) {
2233 MVT VT =
N->getSimpleValueType(0);
2234 bool IsFMA =
N->getOpcode() ==
ISD::FMA;
2246 unsigned Src0Mods, Src1Mods, Src2Mods;
2250 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2251 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2252 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2254 assert((IsFMA || !Mode.allFP32Denormals()) &&
2255 "fmad selected with denormals enabled");
2259 if (Sel0 || Sel1 || Sel2) {
2271 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2278 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2281 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2282 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2295 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2296 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2297 N = glueCopyToM0(
N, PtrBase);
2303 N = glueCopyToM0(
N, Ptr);
2311 N->getOperand(
N->getNumOperands() - 1)
2320 case Intrinsic::amdgcn_ds_gws_init:
2321 return AMDGPU::DS_GWS_INIT;
2322 case Intrinsic::amdgcn_ds_gws_barrier:
2323 return AMDGPU::DS_GWS_BARRIER;
2324 case Intrinsic::amdgcn_ds_gws_sema_v:
2325 return AMDGPU::DS_GWS_SEMA_V;
2326 case Intrinsic::amdgcn_ds_gws_sema_br:
2327 return AMDGPU::DS_GWS_SEMA_BR;
2328 case Intrinsic::amdgcn_ds_gws_sema_p:
2329 return AMDGPU::DS_GWS_SEMA_P;
2330 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2331 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2337 void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2338 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2346 const bool HasVSrc =
N->getNumOperands() == 4;
2347 assert(HasVSrc ||
N->getNumOperands() == 3);
2350 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2361 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2385 glueCopyToM0(
N,
SDValue(M0Base, 0));
2394 Ops.push_back(
N->getOperand(2));
2395 Ops.push_back(OffsetField);
2396 Ops.push_back(Chain);
2402 void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2460 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2461 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2463 case Intrinsic::amdgcn_ds_append:
2464 case Intrinsic::amdgcn_ds_consume: {
2467 SelectDSAppendConsume(
N, IntrID);
2475 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2476 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
2479 case Intrinsic::amdgcn_wqm:
2482 case Intrinsic::amdgcn_softwqm:
2483 Opcode = AMDGPU::SOFT_WQM;
2485 case Intrinsic::amdgcn_wwm:
2486 case Intrinsic::amdgcn_strict_wwm:
2487 Opcode = AMDGPU::STRICT_WWM;
2489 case Intrinsic::amdgcn_strict_wqm:
2490 Opcode = AMDGPU::STRICT_WQM;
2492 case Intrinsic::amdgcn_interp_p1_f16:
2493 SelectInterpP1F16(
N);
2504 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2505 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2507 case Intrinsic::amdgcn_ds_gws_init:
2508 case Intrinsic::amdgcn_ds_gws_barrier:
2509 case Intrinsic::amdgcn_ds_gws_sema_v:
2510 case Intrinsic::amdgcn_ds_gws_sema_br:
2511 case Intrinsic::amdgcn_ds_gws_sema_p:
2512 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2513 SelectDS_GWS(
N, IntrID);
2524 bool AllowAbs)
const {
2530 Src = Src.getOperand(0);
2533 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2535 Src = Src.getOperand(0);
2544 if (SelectVOP3ModsImpl(
In, Src, Mods)) {
2555 if (SelectVOP3ModsImpl(
In, Src, Mods,
false)) {
2565 SelectVOP3Mods(
In, Src, SrcMods);
2566 return isNoNanSrc(Src);
2569 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2584 return SelectVOP3Mods(
In, Src, SrcMods);
2594 return SelectVOP3BMods(
In, Src, SrcMods);
2615 Src = Src.getOperand(0);
2620 unsigned VecMods = Mods;
2622 SDValue Lo = stripBitcast(Src.getOperand(0));
2623 SDValue Hi = stripBitcast(Src.getOperand(1));
2626 Lo = stripBitcast(
Lo.getOperand(0));
2631 Hi = stripBitcast(
Hi.getOperand(0));
2635 if (isExtractHiElt(
Lo,
Lo))
2638 if (isExtractHiElt(
Hi,
Hi))
2641 unsigned VecSize = Src.getValueSizeInBits();
2642 Lo = stripExtractLoElt(
Lo);
2643 Hi = stripExtractLoElt(
Hi);
2645 if (
Lo.getValueSizeInBits() >
VecSize) {
2651 if (
Hi.getValueSizeInBits() >
VecSize) {
2660 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
2672 Lo.getValueType()), 0);
2673 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2674 : AMDGPU::SReg_64RegClassID;
2681 Src.getValueType(), Ops), 0);
2687 if (
VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
2688 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
2689 .bitcastToAPInt().getZExtValue();
2709 return SelectVOP3PMods(
In, Src, SrcMods,
true);
2723 return SelectVOP3Mods(
In, Src, SrcMods);
2728 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
2729 unsigned &Mods)
const {
2731 SelectVOP3ModsImpl(
In, Src, Mods);
2734 Src = Src.getOperand(0);
2735 assert(Src.getValueType() == MVT::f16);
2736 Src = stripBitcast(Src);
2742 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2757 if (isExtractHiElt(Src, Src)) {
2772 SelectVOP3PMadMixModsImpl(
In, Src, Mods);
2789 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL,
MVT::i32);
2793 if (isExtractHiElt(
In, Src))
2799 bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
2808 bool AllUsesAcceptSReg =
true;
2810 Limit < 10 && U !=
E; ++U, ++Limit) {
2819 if (RC != &AMDGPU::VS_32RegClass) {
2820 AllUsesAcceptSReg =
false;
2822 if (
User->isMachineOpcode()) {
2823 unsigned Opc =
User->getMachineOpcode();
2826 unsigned OpIdx = Desc.
getNumDefs() + U.getOperandNo();
2827 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2829 unsigned CommutedOpNo = CommuteIdx1 - Desc.
getNumDefs();
2831 if (CommutedRC == &AMDGPU::VS_32RegClass)
2832 AllUsesAcceptSReg =
true;
2840 if (!AllUsesAcceptSReg)
2844 return !AllUsesAcceptSReg && (Limit < 10);
2847 bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
2848 auto Ld = cast<LoadSDNode>(
N);
2850 return Ld->getAlignment() >= 4 &&
2865 !
N->isDivergent() &&
2875 bool IsModified =
false;
2882 SDNode *Node = &*Position++;
2888 if (ResNode != Node) {
2895 }
while (IsModified);