29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#ifdef EXPENSIVE_CHECKS
37#define DEBUG_TYPE "amdgpu-isel"
52 In = stripBitcast(In);
58 Out = In.getOperand(0);
69 if (ShiftAmt->getZExtValue() == 16) {
85 return In.getOperand(0);
90 if (Src.getValueType().getSizeInBits() == 32)
91 return stripBitcast(Src);
100 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
105#ifdef EXPENSIVE_CHECKS
110 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
131bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
195#ifdef EXPENSIVE_CHECKS
196 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
197 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
199 assert(L->isLCSSAForm(DT));
208#ifdef EXPENSIVE_CHECKS
217 MVT VT =
N->getValueType(0).getSimpleVT();
218 if (VT != MVT::v2i16 && VT != MVT::v2f16)
224 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
261 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
262 if (LdLo &&
Lo.hasOneUse()) {
301 bool MadeChange =
false;
307 switch (
N->getOpcode()) {
324bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
330 return TII->isInlineConstant(
C->getAPIntValue());
333 return TII->isInlineConstant(
C->getValueAPF());
343 unsigned OpNo)
const {
344 if (!
N->isMachineOpcode()) {
346 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
347 if (Reg.isVirtual()) {
349 return MRI.getRegClass(Reg);
353 return TRI->getPhysRegBaseClass(Reg);
359 switch (
N->getMachineOpcode()) {
363 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
364 if (OpIdx >=
Desc.getNumOperands())
366 int RegClass =
Desc.operands()[OpIdx].RegClass;
372 case AMDGPU::REG_SEQUENCE: {
373 unsigned RCID =
N->getConstantOperandVal(0);
377 SDValue SubRegOp =
N->getOperand(OpNo + 1);
387 SmallVector <SDValue, 8> Ops;
389 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
400 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
403 return glueCopyToOp(
N,
M0,
M0.getValue(1));
406SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
407 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
424 AMDGPU::S_MOV_B32,
DL, MVT::i32,
427 AMDGPU::S_MOV_B32,
DL, MVT::i32,
438 EVT VT =
N->getValueType(0);
444 if (NumVectorElts == 1) {
450 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
460 bool IsRegSeq =
true;
461 unsigned NOps =
N->getNumOperands();
462 for (
unsigned i = 0; i < NOps; i++) {
464 if (isa<RegisterSDNode>(
N->getOperand(i))) {
470 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
473 if (NOps != NumVectorElts) {
478 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
481 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
482 RegSeqArgs[1 + (2 * i) + 1] =
493 EVT VT =
N->getValueType(0);
503 auto *SVN = cast<ShuffleVectorSDNode>(
N);
511 Mask[0] < 4 && Mask[1] < 4);
513 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
514 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
515 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
516 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
519 Src0SubReg = Src1SubReg;
526 Src1SubReg = Src0SubReg;
538 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
539 Src1SubReg == AMDGPU::sub0) {
560 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
582 unsigned int Opc =
N->getOpcode();
583 if (
N->isMachineOpcode()) {
591 N = glueCopyToM0LDSInit(
N);
606 if (
N->getValueType(0) != MVT::i64)
609 SelectADD_SUB_I64(
N);
614 if (
N->getValueType(0) != MVT::i32)
621 SelectUADDO_USUBO(
N);
625 SelectFMUL_W_CHAIN(
N);
629 SelectFMA_W_CHAIN(
N);
635 EVT VT =
N->getValueType(0);
649 unsigned RegClassID =
660 if (
N->getValueType(0) == MVT::i128) {
664 }
else if (
N->getValueType(0) == MVT::i64) {
671 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
672 N->getOperand(1), SubReg1 };
674 N->getValueType(0), Ops));
680 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
685 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
690 Imm =
C->getZExtValue();
737 return SelectMUL_LOHI(
N);
748 if (
N->getValueType(0) != MVT::i32)
765 if (
N->getValueType(0) == MVT::i32) {
768 { N->getOperand(0), N->getOperand(1) });
776 SelectINTRINSIC_W_CHAIN(
N);
780 SelectINTRINSIC_WO_CHAIN(
N);
784 SelectINTRINSIC_VOID(
N);
788 SelectWAVE_ADDRESS(
N);
792 SelectSTACKRESTORE(
N);
800bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
803 return Term->getMetadata(
"amdgpu.uniform") ||
804 Term->getMetadata(
"structurizecfg.uniform");
807bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
808 unsigned ShAmtBits)
const {
811 const APInt &
RHS =
N->getConstantOperandAPInt(1);
812 if (
RHS.countr_one() >= ShAmtBits)
816 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
842 N1 =
Lo.getOperand(1);
859 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
867 return "AMDGPU DAG->DAG Pattern Instruction Selection";
877#ifdef EXPENSIVE_CHECKS
883 for (
auto &L : LI.getLoopsInPreorder())
884 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
903 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
907 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
911 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
922SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
925 AMDGPU::S_MOV_B32,
DL, MVT::i32,
931void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
936 unsigned Opcode =
N->getOpcode();
946 DL, MVT::i32, LHS, Sub0);
948 DL, MVT::i32, LHS, Sub1);
951 DL, MVT::i32, RHS, Sub0);
953 DL, MVT::i32, RHS, Sub1);
957 static const unsigned OpcMap[2][2][2] = {
958 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
959 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
960 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
961 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
963 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
964 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
989 MVT::i64, RegSequenceArgs);
1000void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1006 if (
N->isDivergent()) {
1008 : AMDGPU::V_SUBB_U32_e64;
1010 N, Opc,
N->getVTList(),
1012 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1015 : AMDGPU::S_SUB_CO_PSEUDO;
1016 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
1020void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1025 bool IsVALU =
N->isDivergent();
1029 if (UI.getUse().getResNo() == 1) {
1038 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1041 N, Opc,
N->getVTList(),
1042 {N->getOperand(0), N->getOperand(1),
1043 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1045 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1046 : AMDGPU::S_USUBO_PSEUDO;
1048 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
1049 {
N->getOperand(0),
N->getOperand(1)});
1053void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1058 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1059 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
1060 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
1061 Ops[8] =
N->getOperand(0);
1062 Ops[9] =
N->getOperand(4);
1067 cast<ConstantSDNode>(Ops[0])->isZero() &&
1068 cast<ConstantSDNode>(Ops[2])->isZero() &&
1069 cast<ConstantSDNode>(Ops[4])->isZero();
1070 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1074void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1079 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1080 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
1081 Ops[6] =
N->getOperand(0);
1082 Ops[7] =
N->getOperand(3);
1089void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1091 EVT VT =
N->getValueType(0);
1093 assert(VT == MVT::f32 || VT == MVT::f64);
1096 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1101 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1102 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1103 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1109void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1114 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1115 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1117 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1120 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1127void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1132 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1133 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1135 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1139 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1145 MVT::i32,
SDValue(Mad, 0), Sub0);
1151 MVT::i32,
SDValue(Mad, 0), Sub1);
1186 int64_t ByteOffset =
C->getSExtValue();
1187 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1194 Zero,
Addr.getOperand(1));
1196 if (isDSOffsetLegal(Sub, ByteOffset)) {
1202 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1204 SubOp = AMDGPU::V_SUB_U32_e64;
1226 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1229 DL, MVT::i32, Zero);
1242bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1244 unsigned Size)
const {
1245 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1247 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1262 Addr->getFlags().hasNoUnsignedWrap()) ||
1269bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1278 auto LHS =
Addr.getOperand(0);
1279 auto RHS =
Addr.getOperand(1);
1286 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1296bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1305 auto LHS =
Addr.getOperand(0);
1306 auto RHS =
Addr.getOperand(1);
1312bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1319 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1326 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1329 auto LHS =
Base.getOperand(0);
1330 auto RHS =
Base.getOperand(1);
1338 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1344 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1349 unsigned Size)
const {
1357 unsigned OffsetValue1 = OffsetValue0 +
Size;
1360 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1369 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1370 unsigned OffsetValue0 =
C->getZExtValue();
1371 unsigned OffsetValue1 = OffsetValue0 +
Size;
1373 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1383 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1387 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1389 SubOp = AMDGPU::V_SUB_U32_e64;
1407 unsigned OffsetValue0 = CAddr->getZExtValue();
1408 unsigned OffsetValue1 = OffsetValue0 +
Size;
1410 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1450 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1452 N0 =
Addr.getOperand(0);
1511 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1527 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1531 if (
C->getSExtValue()) {
1544std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1547 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1558bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1570 int64_t
Imm = CAddr->getSExtValue();
1571 const int64_t NullPtr =
1574 if (Imm != NullPtr) {
1579 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1580 VAddr =
SDValue(MovHighBits, 0);
1610 if (
TII->isLegalMUBUFImmOffset(C1) &&
1613 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1620 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1628 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1629 if (!Reg.isPhysical())
1631 const auto *RC =
TRI.getPhysRegBaseClass(Reg);
1632 return RC &&
TRI.isSGPRClass(RC);
1635bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1658 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1664 SOffset =
Addr.getOperand(0);
1665 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1685 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1688 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1689 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1690 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1692 maskTrailingOnes<uint64_t>(32);
1704bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1711 SOffset = ByteOffsetNode;
1721 assert(isa<BuildVectorSDNode>(
N));
1732 int64_t OffsetVal = 0;
1736 bool CanHaveFlatSegmentOffsetBug =
1743 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1745 isFlatScratchBaseLegal(
Addr))) {
1746 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1749 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1751 OffsetVal = COffsetVal;
1766 std::tie(OffsetVal, RemainderOffset) =
1767 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1770 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1773 if (
Addr.getValueType().getSizeInBits() == 32) {
1777 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1779 AddOp = AMDGPU::V_ADD_U32_e64;
1790 DL, MVT::i32, N0, Sub0);
1792 DL, MVT::i32, N0, Sub1);
1795 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1801 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1804 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1812 MVT::i64, RegSequenceArgs),
1839 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1853bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1858 int64_t ImmOffset = 0;
1864 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1865 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1871 ImmOffset = COffsetVal;
1872 }
else if (!
LHS->isDivergent()) {
1873 if (COffsetVal > 0) {
1878 int64_t SplitImmOffset, RemainderOffset;
1879 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1882 if (isUInt<32>(RemainderOffset)) {
1884 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1898 unsigned NumLiterals =
1911 if (!
LHS->isDivergent()) {
1919 if (!SAddr && !
RHS->isDivergent()) {
1934 isa<ConstantSDNode>(
Addr))
1949 if (
auto *FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1952 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1955 auto *FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1957 FI->getValueType(0));
1970 if (
Addr->isDivergent())
1975 int64_t COffsetVal = 0;
1978 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1979 SAddr =
Addr.getOperand(0);
1990 int64_t SplitImmOffset, RemainderOffset;
1991 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1994 COffsetVal = SplitImmOffset;
1998 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1999 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL,
MVT::i32);
2011bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2026 return (VMax & 3) + (
SMax & 3) >= 4;
2032 int64_t ImmOffset = 0;
2036 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
2037 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
2042 ImmOffset = COffsetVal;
2043 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2047 int64_t SplitImmOffset, RemainderOffset;
2048 std::tie(SplitImmOffset, RemainderOffset)
2051 if (isUInt<32>(RemainderOffset)) {
2053 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2057 if (!isFlatScratchBaseLegal(
Addr))
2059 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2073 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2076 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2083 if (OrigAddr !=
Addr) {
2084 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2087 if (!isFlatScratchBaseLegalSV(OrigAddr))
2091 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2101bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2104 int64_t ImmOffset)
const {
2105 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2118bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
2120 bool Imm32Only,
bool IsBuffer,
2122 int64_t ImmOffset)
const {
2124 "Cannot match both soffset and offset at the same time!");
2133 *SOffset = ByteOffsetNode;
2134 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2140 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2147 SDLoc SL(ByteOffsetNode);
2151 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2153 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2154 if (EncodedOffset &&
Offset && !Imm32Only) {
2164 if (EncodedOffset &&
Offset && Imm32Only) {
2169 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2183 if (
Addr.getValueType() != MVT::i32)
2191 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2212 bool Imm32Only,
bool IsBuffer,
2214 int64_t ImmOffset)
const {
2216 assert(!Imm32Only && !IsBuffer);
2219 if (!SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset,
false,
false,
true))
2224 ImmOff =
C->getSExtValue();
2226 return SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr,
false,
false,
true,
2233 !
Addr->getFlags().hasNoUnsignedWrap())
2239 N0 =
Addr.getOperand(0);
2240 N1 =
Addr.getOperand(1);
2242 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2247 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2252 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2262 bool Imm32Only)
const {
2263 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2264 SBase = Expand32BitAddress(SBase);
2268 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2279 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2285 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2291 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2297 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2301 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2305bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2308 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2312bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2316 return N.getValueType() == MVT::i32 &&
2317 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2322bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2344 if (isa<ConstantSDNode>(Index))
2352SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2356 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2362 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2372void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2377 const SDValue &Shl =
N->getOperand(0);
2385 if (0 < BVal && BVal <= CVal && CVal < 32) {
2395void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2396 switch (
N->getOpcode()) {
2398 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2401 const SDValue &Srl =
N->getOperand(0);
2405 if (Shift && Mask) {
2419 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2426 if (Shift && Mask) {
2437 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2438 SelectS_BFEFromShifts(
N);
2443 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2444 SelectS_BFEFromShifts(
N);
2455 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2459 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2469bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2471 if (!
N->hasOneUse())
2481 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2485 if (VT == MVT::i64) {
2508 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2512 auto Cond = VCMP.getOperand(0);
2524void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2527 if (
Cond.isUndef()) {
2529 N->getOperand(2),
N->getOperand(0));
2535 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2536 bool AndExec = !UseSCCBr;
2537 bool Negate =
false;
2542 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2555 bool NegatedBallot =
false;
2558 UseSCCBr = !BallotCond->isDivergent();
2559 Negate = Negate ^ NegatedBallot;
2574 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2575 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2576 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2595 Subtarget->
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2610void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2612 !
N->isDivergent()) {
2614 if (Src.getValueType() == MVT::f16) {
2626void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2629 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2630 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2644 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2645 N = glueCopyToM0(
N, PtrBase);
2651 N = glueCopyToM0(
N,
Ptr);
2659 N->getOperand(
N->getNumOperands() - 1)
2668void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2669 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2670 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2671 N->getOperand(5),
N->getOperand(0)};
2681 case Intrinsic::amdgcn_ds_gws_init:
2682 return AMDGPU::DS_GWS_INIT;
2683 case Intrinsic::amdgcn_ds_gws_barrier:
2684 return AMDGPU::DS_GWS_BARRIER;
2685 case Intrinsic::amdgcn_ds_gws_sema_v:
2686 return AMDGPU::DS_GWS_SEMA_V;
2687 case Intrinsic::amdgcn_ds_gws_sema_br:
2688 return AMDGPU::DS_GWS_SEMA_BR;
2689 case Intrinsic::amdgcn_ds_gws_sema_p:
2690 return AMDGPU::DS_GWS_SEMA_P;
2691 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2692 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2698void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2699 if (!Subtarget->
hasGWS() ||
2700 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2708 const bool HasVSrc =
N->getNumOperands() == 4;
2709 assert(HasVSrc ||
N->getNumOperands() == 3);
2712 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2723 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2729 ImmOffset = ConstOffset->getZExtValue();
2747 glueCopyToM0(
N,
SDValue(M0Base, 0));
2764void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2822void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2823 unsigned IntrID =
N->getConstantOperandVal(1);
2825 case Intrinsic::amdgcn_ds_append:
2826 case Intrinsic::amdgcn_ds_consume: {
2827 if (
N->getValueType(0) != MVT::i32)
2829 SelectDSAppendConsume(
N, IntrID);
2832 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2833 SelectDSBvhStackIntrinsic(
N);
2835 case Intrinsic::amdgcn_init_whole_wave:
2838 ->setInitWholeWave();
2845void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2846 unsigned IntrID =
N->getConstantOperandVal(0);
2847 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2848 SDNode *ConvGlueNode =
N->getGluedNode();
2855 MVT::Glue,
SDValue(ConvGlueNode, 0));
2857 ConvGlueNode =
nullptr;
2860 case Intrinsic::amdgcn_wqm:
2861 Opcode = AMDGPU::WQM;
2863 case Intrinsic::amdgcn_softwqm:
2864 Opcode = AMDGPU::SOFT_WQM;
2866 case Intrinsic::amdgcn_wwm:
2867 case Intrinsic::amdgcn_strict_wwm:
2868 Opcode = AMDGPU::STRICT_WWM;
2870 case Intrinsic::amdgcn_strict_wqm:
2871 Opcode = AMDGPU::STRICT_WQM;
2873 case Intrinsic::amdgcn_interp_p1_f16:
2874 SelectInterpP1F16(
N);
2876 case Intrinsic::amdgcn_permlane16_swap:
2877 case Intrinsic::amdgcn_permlane32_swap: {
2878 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
2880 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
2886 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
2887 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
2888 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
2892 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2894 bool FI =
N->getConstantOperandVal(3);
2906 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2913 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2918void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2919 unsigned IntrID =
N->getConstantOperandVal(1);
2921 case Intrinsic::amdgcn_ds_gws_init:
2922 case Intrinsic::amdgcn_ds_gws_barrier:
2923 case Intrinsic::amdgcn_ds_gws_sema_v:
2924 case Intrinsic::amdgcn_ds_gws_sema_br:
2925 case Intrinsic::amdgcn_ds_gws_sema_p:
2926 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2927 SelectDS_GWS(
N, IntrID);
2936void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2940 {N->getOperand(0), Log2WaveSize});
2943void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2960 if (
N->isDivergent()) {
2967 {SrcVal, Log2WaveSize}),
2975bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2977 bool IsCanonicalizing,
2978 bool AllowAbs)
const {
2984 Src = Src.getOperand(0);
2985 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2988 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2989 if (LHS &&
LHS->isZero()) {
2991 Src = Src.getOperand(1);
2995 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2997 Src = Src.getOperand(0);
3006 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3015bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3018 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3027bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3030 if (SelectVOP3ModsImpl(In, Src, Mods,
3040bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3048bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3052 if (SelectVOP3ModsImpl(In, Src, Mods,
3064bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3066 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3069bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3071 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3074bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3081 return SelectVOP3Mods(In, Src, SrcMods);
3084bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3091 return SelectVOP3BMods(In, Src, SrcMods);
3094bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3105bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3106 SDValue &SrcMods,
bool IsDOT)
const {
3113 Src = Src.getOperand(0);
3118 unsigned VecMods = Mods;
3120 SDValue Lo = stripBitcast(Src.getOperand(0));
3121 SDValue Hi = stripBitcast(Src.getOperand(1));
3124 Lo = stripBitcast(
Lo.getOperand(0));
3129 Hi = stripBitcast(
Hi.getOperand(0));
3139 unsigned VecSize = Src.getValueSizeInBits();
3140 Lo = stripExtractLoElt(
Lo);
3141 Hi = stripExtractLoElt(
Hi);
3143 if (
Lo.getValueSizeInBits() > VecSize) {
3145 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3149 if (
Hi.getValueSizeInBits() > VecSize) {
3151 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3155 assert(
Lo.getValueSizeInBits() <= VecSize &&
3156 Hi.getValueSizeInBits() <= VecSize);
3158 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3162 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
3165 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3170 Lo.getValueType()), 0);
3171 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3172 : AMDGPU::SReg_64RegClassID;
3179 Src.getValueType(), Ops), 0);
3185 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3186 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3187 .bitcastToAPInt().getZExtValue();
3197 Src.getNumOperands() == 2) {
3202 auto *SVN = cast<ShuffleVectorSDNode>(Src);
3205 if (Mask[0] < 2 && Mask[1] < 2) {
3232bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3234 return SelectVOP3PMods(In, Src, SrcMods,
true);
3237bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3241 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3244 unsigned SrcSign =
C->getZExtValue();
3252bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3255 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3258 unsigned SrcVal =
C->getZExtValue();
3269 unsigned DstRegClass;
3271 switch (Elts.
size()) {
3273 DstRegClass = AMDGPU::VReg_256RegClassID;
3277 DstRegClass = AMDGPU::VReg_128RegClassID;
3281 DstRegClass = AMDGPU::VReg_64RegClassID;
3290 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3302 assert(
"unhandled Reg sequence size" &&
3303 (Elts.
size() == 8 || Elts.
size() == 16));
3307 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3308 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3316 {Elts[i + 1], Elts[i], PackLoLo});
3326 const SDLoc &
DL,
unsigned ElementSize) {
3327 if (ElementSize == 16)
3329 if (ElementSize == 32)
3337 unsigned ElementSize) {
3342 for (
auto El : Elts) {
3345 NegAbsElts.
push_back(El->getOperand(0));
3347 if (Elts.size() != NegAbsElts.
size()) {
3367 std::function<
bool(
SDValue)> ModifierCheck) {
3370 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3371 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3372 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3373 if (!ModifierCheck(ElF16))
3380bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3386 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3405 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3427bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3434 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3438 if (EltsF16.
empty())
3453 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3459 if (EltsV2F16.
empty())
3476bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3482 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3486 unsigned ModOpcode =
3505bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3506 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3509 if (isInlineImmediate(
Splat.getNode())) {
3511 unsigned Imm =
C->getAPIntValue().getSExtValue();
3516 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3525 SDValue SplatSrc32 = stripBitcast(In);
3526 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3527 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3528 SDValue SplatSrc16 = stripBitcast(Splat32);
3529 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3532 std::optional<APInt> RawValue;
3534 RawValue =
C->getValueAPF().bitcastToAPInt();
3536 RawValue =
C->getAPIntValue();
3538 if (RawValue.has_value()) {
3539 EVT VT =
In.getValueType().getScalarType();
3545 if (
TII->isInlineConstant(FloatVal)) {
3551 if (
TII->isInlineConstant(RawValue.value())) {
3565bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3584bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3603bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3611bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3614 return SelectVOP3Mods(In, Src, SrcMods);
3619bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3620 unsigned &Mods)
const {
3622 SelectVOP3ModsImpl(In, Src, Mods);
3625 Src = Src.getOperand(0);
3626 assert(Src.getValueType() == MVT::f16);
3627 Src = stripBitcast(Src);
3633 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3660bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3663 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3669bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3672 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3681 unsigned NumOpcodes = 0;
3694 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3696 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
3697 if (
C->isAllOnes()) {
3707 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3721 if (Src.size() == 3) {
3726 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3727 if (
C->isAllOnes()) {
3729 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3730 if (Src[
I] ==
LHS) {
3742 Bits = SrcBits[Src.size()];
3747 switch (In.getOpcode()) {
3755 if (!getOperandBits(
LHS, LHSBits) ||
3756 !getOperandBits(
RHS, RHSBits)) {
3758 return std::make_pair(0, 0);
3764 NumOpcodes +=
Op.first;
3765 LHSBits =
Op.second;
3770 NumOpcodes +=
Op.first;
3771 RHSBits =
Op.second;
3776 return std::make_pair(0, 0);
3780 switch (In.getOpcode()) {
3782 TTbl = LHSBits & RHSBits;
3785 TTbl = LHSBits | RHSBits;
3788 TTbl = LHSBits ^ RHSBits;
3794 return std::make_pair(NumOpcodes + 1, TTbl);
3801 unsigned NumOpcodes;
3803 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
3807 if (NumOpcodes < 2 || Src.empty())
3813 if (NumOpcodes < 4 && !In->isDivergent())
3816 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
3821 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
3822 In.getOperand(1).getOpcode() ==
In.getOpcode()))
3836 while (Src.size() < 3)
3837 Src.push_back(Src[0]);
3859 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3869bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3878 bool AllUsesAcceptSReg =
true;
3880 Limit < 10 &&
U != E; ++
U, ++Limit) {
3882 getOperandRegClass(
U->getUser(),
U->getOperandNo());
3890 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3891 AllUsesAcceptSReg =
false;
3893 if (
User->isMachineOpcode()) {
3894 unsigned Opc =
User->getMachineOpcode();
3896 if (
Desc.isCommutable()) {
3897 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
3900 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3902 getOperandRegClass(
U->getUser(), CommutedOpNo);
3903 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3904 CommutedRC == &AMDGPU::VS_64RegClass)
3905 AllUsesAcceptSReg =
true;
3913 if (!AllUsesAcceptSReg)
3917 return !AllUsesAcceptSReg && (Limit < 10);
3920bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3921 const auto *Ld = cast<LoadSDNode>(
N);
3937 ->isMemOpHasNoClobberedMemOperand(
N)));
3943 bool IsModified =
false;
3950 SDNode *Node = &*Position++;
3956 if (ResNode != Node) {
3963 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDValue getRegister(Register Reg, EVT VT)
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Implement std::hash so that hash_code can be used in STL containers.
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.