29#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
39 "amdgpu-unroll-threshold-private",
40 cl::desc(
"Unroll threshold for AMDGPU if private memory used in a loop"),
44 "amdgpu-unroll-threshold-local",
45 cl::desc(
"Unroll threshold for AMDGPU if local memory used in a loop"),
49 "amdgpu-unroll-threshold-if",
50 cl::desc(
"Unroll threshold increment for AMDGPU for each if statement inside loop"),
54 "amdgpu-unroll-runtime-local",
55 cl::desc(
"Allow runtime unroll for AMDGPU if local memory used in a loop"),
59 "amdgpu-unroll-max-block-to-analyze",
60 cl::desc(
"Inner loop block size threshold to analyze in unroll for AMDGPU"),
65 cl::desc(
"Cost of alloca argument"));
73 cl::desc(
"Maximum alloca size to use for inline cost"));
78 cl::desc(
"Maximum number of BBs allowed in a function after inlining"
79 " (compile time constraint)"));
83 "amdgpu-memcpy-loop-unroll",
84 cl::desc(
"Unroll factor (affecting 4x32-bit operations) to use for memory "
85 "operations when lowering statically-sized memcpy, memmove, or"
95 for (
const Value *V :
I->operand_values()) {
100 return SubLoop->contains(PHI); }))
110 TargetTriple(TM->getTargetTriple()),
112 TLI(ST->getTargetLowering()) {}
117 const Function &
F = *L->getHeader()->getParent();
119 F.getFnAttributeAsParsedInteger(
"amdgpu-unroll-threshold", 300);
120 UP.
MaxCount = std::numeric_limits<unsigned>::max();
136 const unsigned MaxAlloca = (256 - 16) * 4;
142 if (
MDNode *LoopUnrollThreshold =
144 if (LoopUnrollThreshold->getNumOperands() == 2) {
146 LoopUnrollThreshold->getOperand(1));
147 if (MetaThresholdValue) {
153 ThresholdPrivate = std::min(ThresholdPrivate, UP.
Threshold);
154 ThresholdLocal = std::min(ThresholdLocal, UP.
Threshold);
159 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
162 unsigned LocalGEPsSeen = 0;
165 return SubLoop->contains(BB); }))
178 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
179 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
185 << *L <<
" due to " << *Br <<
'\n');
197 unsigned AS =
GEP->getAddressSpace();
198 unsigned Threshold = 0;
200 Threshold = ThresholdPrivate;
202 Threshold = ThresholdLocal;
210 const Value *Ptr =
GEP->getPointerOperand();
216 if (!AllocaSize || AllocaSize->getFixedValue() > MaxAlloca)
225 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
230 << *L <<
" due to LDS use.\n");
235 bool HasLoopDef =
false;
238 if (!Inst || L->isLoopInvariant(
Op))
242 return SubLoop->contains(Inst); }))
266 << *L <<
" due to " << *
GEP <<
'\n');
289 AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,
290 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureUseFlatForGlobal,
291 AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode,
293 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
296 AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK,
297 AMDGPU::FeatureTrapHandler,
301 AMDGPU::FeatureSRAMECC,
304 AMDGPU::FeatureFastFMAF32, AMDGPU::FeatureHalfRate64Ops};
309 TLI(ST->getTargetLowering()), CommonTTI(TM,
F),
310 IsGraphics(
AMDGPU::isGraphics(
F.getCallingConv())) {
313 HasFP64FP16Denormals =
318 return !
F || !ST->isSingleLaneExecution(*
F);
350 if (Opcode == Instruction::Load || Opcode == Instruction::Store)
351 return 32 * 4 / ElemWidth;
354 return (ElemWidth == 8 && ST->has16BitInsts()) ? 4
355 : (ElemWidth == 16 && ST->has16BitInsts()) ? 2
356 : (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
361 unsigned ChainSizeInBytes,
363 unsigned VecRegBitWidth = VF * LoadSize;
366 return 128 / LoadSize;
372 unsigned ChainSizeInBytes,
374 unsigned VecRegBitWidth = VF * StoreSize;
375 if (VecRegBitWidth > 128)
376 return 128 / StoreSize;
392 return 8 * ST->getMaxPrivateElementSize();
400 unsigned AddrSpace)
const {
405 return (Alignment >= 4 || ST->hasUnalignedScratchAccessEnabled()) &&
406 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
413 unsigned AddrSpace)
const {
419 unsigned AddrSpace)
const {
429 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
430 std::optional<uint32_t> AtomicElementSize)
const {
432 if (AtomicElementSize)
446 unsigned I32EltsInVector = 4;
456 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
458 std::optional<uint32_t> AtomicCpySize)
const {
462 OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
463 DestAlign, AtomicCpySize);
466 while (RemainingBytes >= 16) {
468 RemainingBytes -= 16;
472 while (RemainingBytes >= 8) {
478 while (RemainingBytes >= 4) {
484 while (RemainingBytes >= 2) {
490 while (RemainingBytes) {
508 case Intrinsic::amdgcn_ds_ordered_add:
509 case Intrinsic::amdgcn_ds_ordered_swap: {
512 if (!Ordering || !Volatile)
515 unsigned OrderingVal = Ordering->getZExtValue();
522 Info.WriteMem =
true;
523 Info.IsVolatile = !Volatile->isZero();
537 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
538 int ISD = TLI->InstructionOpcodeToISD(Opcode);
542 unsigned NElts = LT.second.isVector() ?
543 LT.second.getVectorNumElements() : 1;
552 return get64BitInstrCost(
CostKind) * LT.first * NElts;
554 if (ST->has16BitInsts() && SLT == MVT::i16)
555 NElts = (NElts + 1) / 2;
558 return getFullRateInstrCost() * LT.first * NElts;
564 if (SLT == MVT::i64) {
566 return 2 * getFullRateInstrCost() * LT.first * NElts;
569 if (ST->has16BitInsts() && SLT == MVT::i16)
570 NElts = (NElts + 1) / 2;
572 return LT.first * NElts * getFullRateInstrCost();
574 const int QuarterRateCost = getQuarterRateInstrCost(
CostKind);
575 if (SLT == MVT::i64) {
576 const int FullRateCost = getFullRateInstrCost();
577 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
580 if (ST->has16BitInsts() && SLT == MVT::i16)
581 NElts = (NElts + 1) / 2;
584 return QuarterRateCost * NElts * LT.first;
592 const int OPC = TLI->InstructionOpcodeToISD(
FAdd->getOpcode());
594 if (ST->hasMadMacF32Insts() && SLT == MVT::f32 && !HasFP32Denormals)
596 if (ST->has16BitInsts() && SLT == MVT::f16 && !HasFP64FP16Denormals)
609 if (ST->hasPackedFP32Ops() && SLT == MVT::f32)
610 NElts = (NElts + 1) / 2;
611 if (ST->hasBF16PackedInsts() && SLT == MVT::bf16)
612 NElts = (NElts + 1) / 2;
614 return LT.first * NElts * get64BitInstrCost(
CostKind);
616 if (ST->has16BitInsts() && SLT == MVT::f16)
617 NElts = (NElts + 1) / 2;
619 if (SLT == MVT::f32 || SLT == MVT::f16 || SLT == MVT::bf16)
620 return LT.first * NElts * getFullRateInstrCost();
626 if (SLT == MVT::f64) {
631 if (!ST->hasUsableDivScaleConditionOutput())
632 Cost += 3 * getFullRateInstrCost();
634 return LT.first *
Cost * NElts;
639 if ((SLT == MVT::f32 && !HasFP32Denormals) ||
640 (SLT == MVT::f16 && ST->has16BitInsts())) {
641 return LT.first * getTransInstrCost(
CostKind) * NElts;
645 if (SLT == MVT::f16 && ST->has16BitInsts()) {
651 int Cost = 4 * getFullRateInstrCost() + 2 * getTransInstrCost(
CostKind);
652 return LT.first *
Cost * NElts;
659 int Cost = getTransInstrCost(
CostKind) + getFullRateInstrCost();
660 return LT.first *
Cost * NElts;
663 if (SLT == MVT::f32 || SLT == MVT::f16) {
665 int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
668 if (!HasFP32Denormals) {
670 Cost += 2 * getFullRateInstrCost();
673 return LT.first * NElts *
Cost;
679 return TLI->isFNegFree(SLT) ? 0 : NElts;
693 case Intrinsic::fmuladd:
694 case Intrinsic::copysign:
695 case Intrinsic::minimumnum:
696 case Intrinsic::maximumnum:
697 case Intrinsic::canonicalize:
699 case Intrinsic::round:
700 case Intrinsic::uadd_sat:
701 case Intrinsic::usub_sat:
702 case Intrinsic::sadd_sat:
703 case Intrinsic::ssub_sat:
714 switch (ICA.
getID()) {
715 case Intrinsic::fabs:
718 case Intrinsic::amdgcn_workitem_id_x:
719 case Intrinsic::amdgcn_workitem_id_y:
720 case Intrinsic::amdgcn_workitem_id_z:
724 case Intrinsic::amdgcn_workgroup_id_x:
725 case Intrinsic::amdgcn_workgroup_id_y:
726 case Intrinsic::amdgcn_workgroup_id_z:
727 case Intrinsic::amdgcn_lds_kernel_id:
728 case Intrinsic::amdgcn_dispatch_ptr:
729 case Intrinsic::amdgcn_dispatch_id:
730 case Intrinsic::amdgcn_implicitarg_ptr:
731 case Intrinsic::amdgcn_queue_ptr:
743 case Intrinsic::exp2:
744 case Intrinsic::exp10: {
746 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
749 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
751 if (SLT == MVT::f64) {
753 if (IID == Intrinsic::exp)
755 else if (IID == Intrinsic::exp10)
761 if (SLT == MVT::f32) {
762 unsigned NumFullRateOps = 0;
764 unsigned NumTransOps = 1;
770 NumFullRateOps = ST->hasFastFMAF32() ? 13 : 17;
772 if (IID == Intrinsic::exp) {
775 }
else if (IID == Intrinsic::exp10) {
781 if (HasFP32Denormals)
786 NumTransOps * getTransInstrCost(
CostKind);
787 return LT.first * NElts *
Cost;
793 case Intrinsic::log2:
794 case Intrinsic::log10: {
795 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
798 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
800 if (SLT == MVT::f32) {
801 unsigned NumFullRateOps = 0;
803 if (IID == Intrinsic::log2) {
811 NumFullRateOps = ST->hasFastFMAF32() ? 8 : 11;
814 if (HasFP32Denormals)
818 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
819 return LT.first * NElts *
Cost;
825 case Intrinsic::cos: {
826 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
829 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
831 if (SLT == MVT::f32) {
833 unsigned NumFullRateOps = ST->hasTrigReducedRange() ? 2 : 1;
836 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
837 return LT.first * NElts *
Cost;
842 case Intrinsic::sqrt: {
843 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
846 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
848 if (SLT == MVT::f32) {
849 unsigned NumFullRateOps = 0;
853 NumFullRateOps = HasFP32Denormals ? 17 : 16;
857 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
858 return LT.first * NElts *
Cost;
870 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
872 unsigned NElts = LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
874 if ((ST->hasVOP3PInsts() &&
875 (SLT == MVT::f16 || SLT == MVT::i16 ||
876 (SLT == MVT::bf16 && ST->hasBF16PackedInsts()))) ||
877 (ST->hasPackedFP32Ops() && SLT == MVT::f32))
878 NElts = (NElts + 1) / 2;
881 unsigned InstRate = getQuarterRateInstrCost(
CostKind);
883 switch (ICA.
getID()) {
885 case Intrinsic::fmuladd:
886 if (SLT == MVT::f64) {
887 InstRate = get64BitInstrCost(
CostKind);
891 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
892 InstRate = getFullRateInstrCost();
894 InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost(
CostKind)
895 : getQuarterRateInstrCost(
CostKind);
898 case Intrinsic::copysign:
899 return NElts * getFullRateInstrCost();
900 case Intrinsic::minimumnum:
901 case Intrinsic::maximumnum: {
913 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
914 InstRate = BaseRate *
NumOps;
917 case Intrinsic::canonicalize: {
919 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
922 case Intrinsic::uadd_sat:
923 case Intrinsic::usub_sat:
924 case Intrinsic::sadd_sat:
925 case Intrinsic::ssub_sat: {
926 if (SLT == MVT::i16 || SLT == MVT::i32)
927 InstRate = getFullRateInstrCost();
929 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
936 if (SLT == MVT::i16 || SLT == MVT::i32)
937 InstRate = 2 * getFullRateInstrCost();
943 return LT.first * NElts * InstRate;
949 assert((
I ==
nullptr ||
I->getOpcode() == Opcode) &&
950 "Opcode should reflect passed instruction.");
953 const int CBrCost = SCost ? 5 : 7;
955 case Instruction::UncondBr:
957 return SCost ? 1 : 4;
958 case Instruction::CondBr:
962 case Instruction::Switch: {
966 return (
SI ? (
SI->getNumCases() + 1) : 4) * (CBrCost + 1);
968 case Instruction::Ret:
969 return SCost ? 1 : 10;
976 std::optional<FastMathFlags> FMF,
981 EVT OrigTy = TLI->getValueType(
DL, Ty);
988 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
989 return LT.first * getFullRateInstrCost();
996 EVT OrigTy = TLI->getValueType(
DL, Ty);
1003 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1004 return LT.first * getHalfRateInstrCost(
CostKind);
1011 case Instruction::ExtractElement:
1012 case Instruction::InsertElement: {
1016 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
1027 return Index == ~0u ? 2 : 0;
1042 if (Indices.
size() > 1)
1048 TLI->ParseConstraints(
DL, ST->getRegisterInfo(), *CI);
1050 const int TargetOutputIdx = Indices.
empty() ? -1 : Indices[0];
1053 for (
auto &TC : TargetConstraints) {
1058 if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
1061 TLI->ComputeConstraintToUse(TC,
SDValue());
1064 TRI, TC.ConstraintCode, TC.ConstraintVT).second;
1068 if (!RC || !
TRI->isSGPRClass(RC))
1098bool GCNTTIImpl::isSourceOfDivergence(
const Value *V)
const {
1122 case Intrinsic::read_register:
1124 case Intrinsic::amdgcn_addrspacecast_nonnull: {
1126 Intrinsic->getOperand(0)->getType()->getPointerAddressSpace();
1127 unsigned DstAS =
Intrinsic->getType()->getPointerAddressSpace();
1130 ST->hasGloballyAddressableScratch();
1132 case Intrinsic::amdgcn_workitem_id_y:
1133 case Intrinsic::amdgcn_workitem_id_z: {
1138 *
F, IID == Intrinsic::amdgcn_workitem_id_y ? 1 : 2);
1139 return !HasUniformYZ && (!ThisDimSize || *ThisDimSize != 1);
1148 if (CI->isInlineAsm())
1163 ST->hasGloballyAddressableScratch();
1169bool GCNTTIImpl::isAlwaysUniform(
const Value *V)
const {
1174 if (CI->isInlineAsm())
1192 bool XDimDoesntResetWithinWaves =
false;
1195 XDimDoesntResetWithinWaves = ST->hasWavefrontsEvenlySplittingXDim(*
F);
1197 using namespace llvm::PatternMatch;
1203 return C >= ST->getWavefrontSizeLog2() && XDimDoesntResetWithinWaves;
1210 ST->getWavefrontSizeLog2() &&
1211 XDimDoesntResetWithinWaves;
1226 case Intrinsic::amdgcn_if:
1227 case Intrinsic::amdgcn_else: {
1228 ArrayRef<unsigned> Indices = ExtValue->
getIndices();
1229 return Indices.
size() == 1 && Indices[0] == 1;
1246 case Intrinsic::amdgcn_is_shared:
1247 case Intrinsic::amdgcn_is_private:
1248 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1249 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1250 case Intrinsic::amdgcn_load_to_lds:
1251 case Intrinsic::amdgcn_make_buffer_rsrc:
1261 Value *NewV)
const {
1262 auto IntrID =
II->getIntrinsicID();
1264 case Intrinsic::amdgcn_is_shared:
1265 case Intrinsic::amdgcn_is_private: {
1266 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
1274 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1275 case Intrinsic::amdgcn_flat_atomic_fmin_num: {
1276 Type *DestTy =
II->getType();
1283 M,
II->getIntrinsicID(), {DestTy, SrcTy, DestTy});
1284 II->setArgOperand(0, NewV);
1285 II->setCalledFunction(NewDecl);
1288 case Intrinsic::amdgcn_load_to_lds: {
1293 II->setArgOperand(0, NewV);
1294 II->setCalledFunction(NewDecl);
1297 case Intrinsic::amdgcn_make_buffer_rsrc: {
1299 Type *DstTy =
II->getType();
1302 M,
II->getIntrinsicID(), {DstTy, SrcTy});
1303 II->setArgOperand(0, NewV);
1304 II->setCalledFunction(NewDecl);
1325 unsigned ScalarSize =
DL.getTypeSizeInBits(SrcTy->getElementType());
1327 (ScalarSize == 16 || ScalarSize == 8)) {
1340 unsigned NumSrcElts = SrcVecTy->getNumElements();
1341 if (ST->hasVOP3PInsts() && ScalarSize == 16 && NumSrcElts == 2 &&
1347 unsigned EltsPerReg = 32 / ScalarSize;
1355 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1358 if (Index % EltsPerReg == 0)
1361 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1367 unsigned NumDstElts = DstVecTy->getNumElements();
1369 unsigned EndIndex = Index + NumInsertElts;
1370 unsigned BeginSubIdx = Index % EltsPerReg;
1371 unsigned EndSubIdx = EndIndex % EltsPerReg;
1374 if (BeginSubIdx != 0) {
1382 if (EndIndex < NumDstElts && BeginSubIdx < EndSubIdx)
1391 unsigned NumElts = DstVecTy->getNumElements();
1395 unsigned EltsFromLHS = NumElts - Index;
1396 bool LHSIsAligned = (Index % EltsPerReg) == 0;
1397 bool RHSIsAligned = (EltsFromLHS % EltsPerReg) == 0;
1398 if (LHSIsAligned && RHSIsAligned)
1400 if (LHSIsAligned && !RHSIsAligned)
1401 return divideCeil(NumElts, EltsPerReg) - (EltsFromLHS / EltsPerReg);
1402 if (!LHSIsAligned && RHSIsAligned)
1410 if (!Mask.empty()) {
1420 for (
unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx += EltsPerReg) {
1423 for (
unsigned I = 0;
I < EltsPerReg && DstIdx +
I < Mask.size(); ++
I) {
1424 int SrcIdx = Mask[DstIdx +
I];
1428 if (SrcIdx < (
int)NumSrcElts) {
1429 Reg = SrcIdx / EltsPerReg;
1430 if (SrcIdx % EltsPerReg !=
I)
1433 Reg = NumSrcElts + (SrcIdx - NumSrcElts) / EltsPerReg;
1434 if ((SrcIdx - NumSrcElts) % EltsPerReg !=
I)
1440 if (Regs.
size() >= 2)
1460 for (
auto &
Op :
I->operands()) {
1473 if (OpInst->getType()->isVectorTy() && OpInst->getNumOperands() > 1) {
1475 if (VecOpInst && VecOpInst->
hasOneUse())
1480 OpInst->getOperand(0),
1481 OpInst->getOperand(1)) == 0) {
1490 unsigned EltSize =
DL.getTypeSizeInBits(
1495 if (EltSize < 16 || !ST->has16BitInsts())
1498 int NumSubElts, SubIndex;
1499 if (Shuffle->changesLength()) {
1500 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) {
1505 if ((Shuffle->isExtractSubvectorMask(SubIndex) ||
1506 Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) &&
1507 !(SubIndex & 0x1)) {
1513 if (Shuffle->isReverse() || Shuffle->isZeroEltSplat() ||
1514 Shuffle->isSingleSource()) {
1521 return !
Ops.empty();
1532 const FeatureBitset &CallerBits = CallerST->getFeatureBits();
1533 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
1535 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
1536 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
1537 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
1547 if (Callee->hasFnAttribute(Attribute::AlwaysInline) ||
1548 Callee->hasFnAttribute(Attribute::InlineHint))
1554 if (Callee->size() == 1)
1556 size_t BBSize = Caller->size() + Callee->size() - 1;
1566 const int NrOfSGPRUntilSpill = 26;
1567 const int NrOfVGPRUntilSpill = 32;
1571 unsigned adjustThreshold = 0;
1577 for (
auto ArgVT : ValueVTs) {
1581 SGPRsInUse += CCRegNum;
1583 VGPRsInUse += CCRegNum;
1593 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1596 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1602 adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
1604 adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
1606 return adjustThreshold;
1615 unsigned AllocaSize = 0;
1622 unsigned AddrSpace = Ty->getAddressSpace();
1632 AllocaSize +=
Size->getFixedValue();
1676 static_assert(InlinerVectorBonusPercent == 0,
"vector bonus assumed to be 0");
1680 return BB.getTerminator()->getNumSuccessors() > 1;
1683 Threshold += Threshold / 2;
1691 unsigned AllocaThresholdBonus =
1692 (Threshold * ArgAllocaSize->getFixedValue()) / AllocaSize;
1694 return AllocaThresholdBonus;
1700 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
1705 CommonTTI.getPeelingPreferences(L, SE, PP);
1709 return getQuarterRateInstrCost(
CostKind);
1713 return ST->hasFullRate64Ops()
1714 ? getFullRateInstrCost()
1715 : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(
CostKind)
1716 : getQuarterRateInstrCost(
CostKind);
1719std::pair<InstructionCost, MVT>
1720GCNTTIImpl::getTypeLegalizationCost(
Type *Ty)
const {
1722 auto Size =
DL.getTypeSizeInBits(Ty);
1734 return ST->hasPrefetch() ? 128 : 0;
1745 LB.
push_back({
"amdgpu-max-num-workgroups[0]", MaxNumWorkgroups[0]});
1746 LB.push_back({
"amdgpu-max-num-workgroups[1]", MaxNumWorkgroups[1]});
1747 LB.push_back({
"amdgpu-max-num-workgroups[2]", MaxNumWorkgroups[2]});
1748 std::pair<unsigned, unsigned> FlatWorkGroupSize =
1749 ST->getFlatWorkGroupSizes(
F);
1750 LB.push_back({
"amdgpu-flat-work-group-size[0]", FlatWorkGroupSize.first});
1751 LB.push_back({
"amdgpu-flat-work-group-size[1]", FlatWorkGroupSize.second});
1752 std::pair<unsigned, unsigned> WavesPerEU = ST->getWavesPerEU(
F);
1753 LB.push_back({
"amdgpu-waves-per-eu[0]", WavesPerEU.first});
1754 LB.push_back({
"amdgpu-waves-per-eu[1]", WavesPerEU.second});
1759 if (!ST->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1766 Attribute IEEEAttr =
F->getFnAttribute(
"amdgpu-ieee");
1781 if ((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1782 VecTy->getElementType()->isIntegerTy(8)) {
1793 if (VecTy->getElementType()->isIntegerTy(8)) {
1804 case Intrinsic::amdgcn_wave_shuffle:
1811 if (isAlwaysUniform(V))
1814 if (isSourceOfDivergence(V))
1822 bool HasBaseReg, int64_t Scale,
1823 unsigned AddrSpace)
const {
1824 if (HasBaseReg && Scale != 0) {
1828 if (getST()->hasScaleOffset() && Ty && Ty->isSized() &&
1848 unsigned EffInsnsA =
A.Insns +
A.ScaleCost;
1849 unsigned EffInsnsB =
B.Insns +
B.ScaleCost;
1851 return std::tie(EffInsnsA,
A.NumIVMuls,
A.AddRecCost,
A.NumBaseAdds,
1852 A.SetupCost,
A.ImmCost,
A.NumRegs) <
1853 std::tie(EffInsnsB,
B.NumIVMuls,
B.AddRecCost,
B.NumBaseAdds,
1854 B.SetupCost,
B.ImmCost,
B.NumRegs);
1871 case Intrinsic::amdgcn_wave_shuffle:
1874 return UniformArgs[0] || UniformArgs[1];
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static unsigned getNumElements(Type *Ty)
This file implements the SmallBitVector class.
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfParts(Type *Tp) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned getArgOperandNo(const Use *U) const
Given a use for a arg operand, get the arg operand number that corresponds to it.
This class represents a function call, abstracting a target machine's calling convention.
Conditional Branch instruction.
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
unsigned getNumberOfRegisters(unsigned RCID) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLSRCostLess(const TTI::LSRCost &A, const TTI::LSRCost &B) const override
bool shouldPrefetchAddressSpace(unsigned AS) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool hasBranchDivergence(const Function *F=nullptr) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
unsigned getInliningThresholdMultiplier() const override
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
unsigned adjustInliningThreshold(const CallBase *CB) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool shouldDropLSRSolutionIfLessProfitable() const override
int getInliningLastCallToStaticBonus() const override
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
ValueUniformity getValueUniformity(const Value *V) const override
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
unsigned getMinVectorRegisterBitWidth() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
bool isNumRegsMajorCostOfLSR() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool hasApproxFunc() const LLVM_READONLY
Determine whether the approximate-math-functions flag is set.
LLVM_ABI bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
FastMathFlags getFlags() const
Type * getReturnType() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
A Module instance is used to store all the information related to an LLVM module.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
StringRef - Represent a constant reference to a string, i.e.
std::vector< AsmOperandInfo > AsmOperandInfoVector
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
bool isFlatGlobalAddrSpace(unsigned AS)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isExtendedGlobalAddrSpace(unsigned AS)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ FADD
Simple binary floating point operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ AND
Bitwise operators - logical and, logical or, logical xor.
LLVM_ABI int getInstrCost()
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
@ Custom
The result value requires a custom uniformity check.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr DenormalMode getPreserveSign()
uint64_t getScalarSizeInBits() const
Information about a load/store intrinsic defined by the target.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const