29#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
39 "amdgpu-unroll-threshold-private",
40 cl::desc(
"Unroll threshold for AMDGPU if private memory used in a loop"),
44 "amdgpu-unroll-threshold-local",
45 cl::desc(
"Unroll threshold for AMDGPU if local memory used in a loop"),
49 "amdgpu-unroll-threshold-if",
50 cl::desc(
"Unroll threshold increment for AMDGPU for each if statement inside loop"),
54 "amdgpu-unroll-runtime-local",
55 cl::desc(
"Allow runtime unroll for AMDGPU if local memory used in a loop"),
59 "amdgpu-unroll-max-block-to-analyze",
60 cl::desc(
"Inner loop block size threshold to analyze in unroll for AMDGPU"),
65 cl::desc(
"Cost of alloca argument"));
73 cl::desc(
"Maximum alloca size to use for inline cost"));
78 cl::desc(
"Maximum number of BBs allowed in a function after inlining"
79 " (compile time constraint)"));
83 "amdgpu-memcpy-loop-unroll",
84 cl::desc(
"Unroll factor (affecting 4x32-bit operations) to use for memory "
85 "operations when lowering statically-sized memcpy, memmove, or"
95 for (
const Value *V :
I->operand_values()) {
100 return SubLoop->contains(PHI); }))
110 TargetTriple(TM->getTargetTriple()),
112 TLI(ST->getTargetLowering()) {}
117 const Function &
F = *L->getHeader()->getParent();
119 F.getFnAttributeAsParsedInteger(
"amdgpu-unroll-threshold", 300);
120 UP.
MaxCount = std::numeric_limits<unsigned>::max();
133 const unsigned MaxAlloca = (256 - 16) * 4;
139 if (
MDNode *LoopUnrollThreshold =
141 if (LoopUnrollThreshold->getNumOperands() == 2) {
143 LoopUnrollThreshold->getOperand(1));
144 if (MetaThresholdValue) {
150 ThresholdPrivate = std::min(ThresholdPrivate, UP.
Threshold);
151 ThresholdLocal = std::min(ThresholdLocal, UP.
Threshold);
156 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
159 unsigned LocalGEPsSeen = 0;
162 return SubLoop->contains(BB); }))
175 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
176 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
182 << *L <<
" due to " << *Br <<
'\n');
194 unsigned AS =
GEP->getAddressSpace();
195 unsigned Threshold = 0;
197 Threshold = ThresholdPrivate;
199 Threshold = ThresholdLocal;
207 const Value *Ptr =
GEP->getPointerOperand();
213 if (!AllocaSize || AllocaSize->getFixedValue() > MaxAlloca)
222 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
227 << *L <<
" due to LDS use.\n");
232 bool HasLoopDef =
false;
235 if (!Inst || L->isLoopInvariant(
Op))
239 return SubLoop->contains(Inst); }))
263 << *L <<
" due to " << *
GEP <<
'\n');
286 AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,
287 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureUseFlatForGlobal,
288 AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode,
290 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
293 AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK,
294 AMDGPU::FeatureTrapHandler,
298 AMDGPU::FeatureSRAMECC,
301 AMDGPU::FeatureFastFMAF32, AMDGPU::FeatureHalfRate64Ops};
306 TLI(ST->getTargetLowering()), CommonTTI(TM,
F),
307 IsGraphics(
AMDGPU::isGraphics(
F.getCallingConv())) {
310 HasFP64FP16Denormals =
315 return !
F || !ST->isSingleLaneExecution(*
F);
347 if (Opcode == Instruction::Load || Opcode == Instruction::Store)
348 return 32 * 4 / ElemWidth;
351 return (ElemWidth == 8 && ST->has16BitInsts()) ? 4
352 : (ElemWidth == 16 && ST->has16BitInsts()) ? 2
353 : (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
358 unsigned ChainSizeInBytes,
360 unsigned VecRegBitWidth = VF * LoadSize;
363 return 128 / LoadSize;
369 unsigned ChainSizeInBytes,
371 unsigned VecRegBitWidth = VF * StoreSize;
372 if (VecRegBitWidth > 128)
373 return 128 / StoreSize;
389 return 8 * ST->getMaxPrivateElementSize();
397 unsigned AddrSpace)
const {
402 return (Alignment >= 4 || ST->hasUnalignedScratchAccessEnabled()) &&
403 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
410 unsigned AddrSpace)
const {
416 unsigned AddrSpace)
const {
426 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
427 std::optional<uint32_t> AtomicElementSize)
const {
429 if (AtomicElementSize)
443 unsigned I32EltsInVector = 4;
453 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
455 std::optional<uint32_t> AtomicCpySize)
const {
459 OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
460 DestAlign, AtomicCpySize);
463 while (RemainingBytes >= 16) {
465 RemainingBytes -= 16;
469 while (RemainingBytes >= 8) {
475 while (RemainingBytes >= 4) {
481 while (RemainingBytes >= 2) {
487 while (RemainingBytes) {
505 case Intrinsic::amdgcn_ds_ordered_add:
506 case Intrinsic::amdgcn_ds_ordered_swap: {
509 if (!Ordering || !Volatile)
512 unsigned OrderingVal = Ordering->getZExtValue();
519 Info.WriteMem =
true;
520 Info.IsVolatile = !Volatile->isZero();
534 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
535 int ISD = TLI->InstructionOpcodeToISD(Opcode);
539 unsigned NElts = LT.second.isVector() ?
540 LT.second.getVectorNumElements() : 1;
549 return get64BitInstrCost(
CostKind) * LT.first * NElts;
551 if (ST->has16BitInsts() && SLT == MVT::i16)
552 NElts = (NElts + 1) / 2;
555 return getFullRateInstrCost() * LT.first * NElts;
561 if (SLT == MVT::i64) {
563 return 2 * getFullRateInstrCost() * LT.first * NElts;
566 if (ST->has16BitInsts() && SLT == MVT::i16)
567 NElts = (NElts + 1) / 2;
569 return LT.first * NElts * getFullRateInstrCost();
571 const int QuarterRateCost = getQuarterRateInstrCost(
CostKind);
572 if (SLT == MVT::i64) {
573 const int FullRateCost = getFullRateInstrCost();
574 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
577 if (ST->has16BitInsts() && SLT == MVT::i16)
578 NElts = (NElts + 1) / 2;
581 return QuarterRateCost * NElts * LT.first;
589 const int OPC = TLI->InstructionOpcodeToISD(
FAdd->getOpcode());
591 if (ST->hasMadMacF32Insts() && SLT == MVT::f32 && !HasFP32Denormals)
593 if (ST->has16BitInsts() && SLT == MVT::f16 && !HasFP64FP16Denormals)
606 if (ST->hasPackedFP32Ops() && SLT == MVT::f32)
607 NElts = (NElts + 1) / 2;
608 if (ST->hasBF16PackedInsts() && SLT == MVT::bf16)
609 NElts = (NElts + 1) / 2;
611 return LT.first * NElts * get64BitInstrCost(
CostKind);
613 if (ST->has16BitInsts() && SLT == MVT::f16)
614 NElts = (NElts + 1) / 2;
616 if (SLT == MVT::f32 || SLT == MVT::f16 || SLT == MVT::bf16)
617 return LT.first * NElts * getFullRateInstrCost();
623 if (SLT == MVT::f64) {
628 if (!ST->hasUsableDivScaleConditionOutput())
629 Cost += 3 * getFullRateInstrCost();
631 return LT.first *
Cost * NElts;
636 if ((SLT == MVT::f32 && !HasFP32Denormals) ||
637 (SLT == MVT::f16 && ST->has16BitInsts())) {
638 return LT.first * getTransInstrCost(
CostKind) * NElts;
642 if (SLT == MVT::f16 && ST->has16BitInsts()) {
648 int Cost = 4 * getFullRateInstrCost() + 2 * getTransInstrCost(
CostKind);
649 return LT.first *
Cost * NElts;
656 int Cost = getTransInstrCost(
CostKind) + getFullRateInstrCost();
657 return LT.first *
Cost * NElts;
660 if (SLT == MVT::f32 || SLT == MVT::f16) {
662 int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
665 if (!HasFP32Denormals) {
667 Cost += 2 * getFullRateInstrCost();
670 return LT.first * NElts *
Cost;
676 return TLI->isFNegFree(SLT) ? 0 : NElts;
690 case Intrinsic::fmuladd:
691 case Intrinsic::copysign:
692 case Intrinsic::minimumnum:
693 case Intrinsic::maximumnum:
694 case Intrinsic::canonicalize:
696 case Intrinsic::round:
697 case Intrinsic::uadd_sat:
698 case Intrinsic::usub_sat:
699 case Intrinsic::sadd_sat:
700 case Intrinsic::ssub_sat:
711 switch (ICA.
getID()) {
712 case Intrinsic::fabs:
715 case Intrinsic::amdgcn_workitem_id_x:
716 case Intrinsic::amdgcn_workitem_id_y:
717 case Intrinsic::amdgcn_workitem_id_z:
721 case Intrinsic::amdgcn_workgroup_id_x:
722 case Intrinsic::amdgcn_workgroup_id_y:
723 case Intrinsic::amdgcn_workgroup_id_z:
724 case Intrinsic::amdgcn_lds_kernel_id:
725 case Intrinsic::amdgcn_dispatch_ptr:
726 case Intrinsic::amdgcn_dispatch_id:
727 case Intrinsic::amdgcn_implicitarg_ptr:
728 case Intrinsic::amdgcn_queue_ptr:
740 case Intrinsic::exp2:
741 case Intrinsic::exp10: {
743 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
746 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
748 if (SLT == MVT::f64) {
750 if (IID == Intrinsic::exp)
752 else if (IID == Intrinsic::exp10)
758 if (SLT == MVT::f32) {
759 unsigned NumFullRateOps = 0;
761 unsigned NumTransOps = 1;
767 NumFullRateOps = ST->hasFastFMAF32() ? 13 : 17;
769 if (IID == Intrinsic::exp) {
772 }
else if (IID == Intrinsic::exp10) {
778 if (HasFP32Denormals)
783 NumTransOps * getTransInstrCost(
CostKind);
784 return LT.first * NElts *
Cost;
790 case Intrinsic::log2:
791 case Intrinsic::log10: {
792 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
795 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
797 if (SLT == MVT::f32) {
798 unsigned NumFullRateOps = 0;
800 if (IID == Intrinsic::log2) {
808 NumFullRateOps = ST->hasFastFMAF32() ? 8 : 11;
811 if (HasFP32Denormals)
815 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
816 return LT.first * NElts *
Cost;
822 case Intrinsic::cos: {
823 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
826 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
828 if (SLT == MVT::f32) {
830 unsigned NumFullRateOps = ST->hasTrigReducedRange() ? 2 : 1;
833 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
834 return LT.first * NElts *
Cost;
839 case Intrinsic::sqrt: {
840 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
843 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
845 if (SLT == MVT::f32) {
846 unsigned NumFullRateOps = 0;
850 NumFullRateOps = HasFP32Denormals ? 17 : 16;
854 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
855 return LT.first * NElts *
Cost;
867 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
869 unsigned NElts = LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
871 if ((ST->hasVOP3PInsts() &&
872 (SLT == MVT::f16 || SLT == MVT::i16 ||
873 (SLT == MVT::bf16 && ST->hasBF16PackedInsts()))) ||
874 (ST->hasPackedFP32Ops() && SLT == MVT::f32))
875 NElts = (NElts + 1) / 2;
878 unsigned InstRate = getQuarterRateInstrCost(
CostKind);
880 switch (ICA.
getID()) {
882 case Intrinsic::fmuladd:
883 if (SLT == MVT::f64) {
884 InstRate = get64BitInstrCost(
CostKind);
888 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
889 InstRate = getFullRateInstrCost();
891 InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost(
CostKind)
892 : getQuarterRateInstrCost(
CostKind);
895 case Intrinsic::copysign:
896 return NElts * getFullRateInstrCost();
897 case Intrinsic::minimumnum:
898 case Intrinsic::maximumnum: {
910 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
911 InstRate = BaseRate *
NumOps;
914 case Intrinsic::canonicalize: {
916 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
919 case Intrinsic::uadd_sat:
920 case Intrinsic::usub_sat:
921 case Intrinsic::sadd_sat:
922 case Intrinsic::ssub_sat: {
923 if (SLT == MVT::i16 || SLT == MVT::i32)
924 InstRate = getFullRateInstrCost();
926 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
933 if (SLT == MVT::i16 || SLT == MVT::i32)
934 InstRate = 2 * getFullRateInstrCost();
940 return LT.first * NElts * InstRate;
946 assert((
I ==
nullptr ||
I->getOpcode() == Opcode) &&
947 "Opcode should reflect passed instruction.");
950 const int CBrCost = SCost ? 5 : 7;
952 case Instruction::UncondBr:
954 return SCost ? 1 : 4;
955 case Instruction::CondBr:
959 case Instruction::Switch: {
963 return (
SI ? (
SI->getNumCases() + 1) : 4) * (CBrCost + 1);
965 case Instruction::Ret:
966 return SCost ? 1 : 10;
973 std::optional<FastMathFlags> FMF,
978 EVT OrigTy = TLI->getValueType(
DL, Ty);
985 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
986 return LT.first * getFullRateInstrCost();
993 EVT OrigTy = TLI->getValueType(
DL, Ty);
1000 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1001 return LT.first * getHalfRateInstrCost(
CostKind);
1008 case Instruction::ExtractElement:
1009 case Instruction::InsertElement: {
1013 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
1024 return Index == ~0u ? 2 : 0;
1039 if (Indices.
size() > 1)
1045 TLI->ParseConstraints(
DL, ST->getRegisterInfo(), *CI);
1047 const int TargetOutputIdx = Indices.
empty() ? -1 : Indices[0];
1050 for (
auto &TC : TargetConstraints) {
1055 if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
1058 TLI->ComputeConstraintToUse(TC,
SDValue());
1061 TRI, TC.ConstraintCode, TC.ConstraintVT).second;
1065 if (!RC || !
TRI->isSGPRClass(RC))
1095bool GCNTTIImpl::isSourceOfDivergence(
const Value *V)
const {
1119 case Intrinsic::read_register:
1121 case Intrinsic::amdgcn_addrspacecast_nonnull: {
1123 Intrinsic->getOperand(0)->getType()->getPointerAddressSpace();
1124 unsigned DstAS =
Intrinsic->getType()->getPointerAddressSpace();
1127 ST->hasGloballyAddressableScratch();
1129 case Intrinsic::amdgcn_workitem_id_y:
1130 case Intrinsic::amdgcn_workitem_id_z: {
1135 *
F, IID == Intrinsic::amdgcn_workitem_id_y ? 1 : 2);
1136 return !HasUniformYZ && (!ThisDimSize || *ThisDimSize != 1);
1145 if (CI->isInlineAsm())
1160 ST->hasGloballyAddressableScratch();
1166bool GCNTTIImpl::isAlwaysUniform(
const Value *V)
const {
1171 if (CI->isInlineAsm())
1189 bool XDimDoesntResetWithinWaves =
false;
1192 XDimDoesntResetWithinWaves = ST->hasWavefrontsEvenlySplittingXDim(*
F);
1194 using namespace llvm::PatternMatch;
1200 return C >= ST->getWavefrontSizeLog2() && XDimDoesntResetWithinWaves;
1207 ST->getWavefrontSizeLog2() &&
1208 XDimDoesntResetWithinWaves;
1223 case Intrinsic::amdgcn_if:
1224 case Intrinsic::amdgcn_else: {
1225 ArrayRef<unsigned> Indices = ExtValue->
getIndices();
1226 return Indices.
size() == 1 && Indices[0] == 1;
1243 case Intrinsic::amdgcn_is_shared:
1244 case Intrinsic::amdgcn_is_private:
1245 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1246 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1247 case Intrinsic::amdgcn_load_to_lds:
1248 case Intrinsic::amdgcn_make_buffer_rsrc:
1258 Value *NewV)
const {
1259 auto IntrID =
II->getIntrinsicID();
1261 case Intrinsic::amdgcn_is_shared:
1262 case Intrinsic::amdgcn_is_private: {
1263 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
1271 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1272 case Intrinsic::amdgcn_flat_atomic_fmin_num: {
1273 Type *DestTy =
II->getType();
1280 M,
II->getIntrinsicID(), {DestTy, SrcTy, DestTy});
1281 II->setArgOperand(0, NewV);
1282 II->setCalledFunction(NewDecl);
1285 case Intrinsic::amdgcn_load_to_lds: {
1290 II->setArgOperand(0, NewV);
1291 II->setCalledFunction(NewDecl);
1294 case Intrinsic::amdgcn_make_buffer_rsrc: {
1296 Type *DstTy =
II->getType();
1299 M,
II->getIntrinsicID(), {DstTy, SrcTy});
1300 II->setArgOperand(0, NewV);
1301 II->setCalledFunction(NewDecl);
1322 unsigned ScalarSize =
DL.getTypeSizeInBits(SrcTy->getElementType());
1324 (ScalarSize == 16 || ScalarSize == 8)) {
1337 unsigned NumSrcElts = SrcVecTy->getNumElements();
1338 if (ST->hasVOP3PInsts() && ScalarSize == 16 && NumSrcElts == 2 &&
1344 unsigned EltsPerReg = 32 / ScalarSize;
1352 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1355 if (Index % EltsPerReg == 0)
1358 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1364 unsigned NumDstElts = DstVecTy->getNumElements();
1366 unsigned EndIndex = Index + NumInsertElts;
1367 unsigned BeginSubIdx = Index % EltsPerReg;
1368 unsigned EndSubIdx = EndIndex % EltsPerReg;
1371 if (BeginSubIdx != 0) {
1379 if (EndIndex < NumDstElts && BeginSubIdx < EndSubIdx)
1388 unsigned NumElts = DstVecTy->getNumElements();
1392 unsigned EltsFromLHS = NumElts - Index;
1393 bool LHSIsAligned = (Index % EltsPerReg) == 0;
1394 bool RHSIsAligned = (EltsFromLHS % EltsPerReg) == 0;
1395 if (LHSIsAligned && RHSIsAligned)
1397 if (LHSIsAligned && !RHSIsAligned)
1398 return divideCeil(NumElts, EltsPerReg) - (EltsFromLHS / EltsPerReg);
1399 if (!LHSIsAligned && RHSIsAligned)
1407 if (!Mask.empty()) {
1417 for (
unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx += EltsPerReg) {
1420 for (
unsigned I = 0;
I < EltsPerReg && DstIdx +
I < Mask.size(); ++
I) {
1421 int SrcIdx = Mask[DstIdx +
I];
1425 if (SrcIdx < (
int)NumSrcElts) {
1426 Reg = SrcIdx / EltsPerReg;
1427 if (SrcIdx % EltsPerReg !=
I)
1430 Reg = NumSrcElts + (SrcIdx - NumSrcElts) / EltsPerReg;
1431 if ((SrcIdx - NumSrcElts) % EltsPerReg !=
I)
1437 if (Regs.
size() >= 2)
1457 for (
auto &
Op :
I->operands()) {
1470 if (OpInst->getType()->isVectorTy() && OpInst->getNumOperands() > 1) {
1472 if (VecOpInst && VecOpInst->
hasOneUse())
1477 OpInst->getOperand(0),
1478 OpInst->getOperand(1)) == 0) {
1487 unsigned EltSize =
DL.getTypeSizeInBits(
1492 if (EltSize < 16 || !ST->has16BitInsts())
1495 int NumSubElts, SubIndex;
1496 if (Shuffle->changesLength()) {
1497 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) {
1502 if ((Shuffle->isExtractSubvectorMask(SubIndex) ||
1503 Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) &&
1504 !(SubIndex & 0x1)) {
1510 if (Shuffle->isReverse() || Shuffle->isZeroEltSplat() ||
1511 Shuffle->isSingleSource()) {
1518 return !
Ops.empty();
1529 const FeatureBitset &CallerBits = CallerST->getFeatureBits();
1530 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
1532 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
1533 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
1534 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
1544 if (Callee->hasFnAttribute(Attribute::AlwaysInline) ||
1545 Callee->hasFnAttribute(Attribute::InlineHint))
1551 if (Callee->size() == 1)
1553 size_t BBSize = Caller->size() + Callee->size() - 1;
1563 const int NrOfSGPRUntilSpill = 26;
1564 const int NrOfVGPRUntilSpill = 32;
1568 unsigned adjustThreshold = 0;
1574 for (
auto ArgVT : ValueVTs) {
1578 SGPRsInUse += CCRegNum;
1580 VGPRsInUse += CCRegNum;
1590 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1593 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1599 adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
1601 adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
1603 return adjustThreshold;
1612 unsigned AllocaSize = 0;
1619 unsigned AddrSpace = Ty->getAddressSpace();
1629 AllocaSize +=
Size->getFixedValue();
1673 static_assert(InlinerVectorBonusPercent == 0,
"vector bonus assumed to be 0");
1677 return BB.getTerminator()->getNumSuccessors() > 1;
1680 Threshold += Threshold / 2;
1688 unsigned AllocaThresholdBonus =
1689 (Threshold * ArgAllocaSize->getFixedValue()) / AllocaSize;
1691 return AllocaThresholdBonus;
1697 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
1702 CommonTTI.getPeelingPreferences(L, SE, PP);
1706 return getQuarterRateInstrCost(
CostKind);
1710 return ST->hasFullRate64Ops()
1711 ? getFullRateInstrCost()
1712 : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(
CostKind)
1713 : getQuarterRateInstrCost(
CostKind);
1716std::pair<InstructionCost, MVT>
1717GCNTTIImpl::getTypeLegalizationCost(
Type *Ty)
const {
1719 auto Size =
DL.getTypeSizeInBits(Ty);
1731 return ST->hasPrefetch() ? 128 : 0;
1742 LB.
push_back({
"amdgpu-max-num-workgroups[0]", MaxNumWorkgroups[0]});
1743 LB.push_back({
"amdgpu-max-num-workgroups[1]", MaxNumWorkgroups[1]});
1744 LB.push_back({
"amdgpu-max-num-workgroups[2]", MaxNumWorkgroups[2]});
1745 std::pair<unsigned, unsigned> FlatWorkGroupSize =
1746 ST->getFlatWorkGroupSizes(
F);
1747 LB.push_back({
"amdgpu-flat-work-group-size[0]", FlatWorkGroupSize.first});
1748 LB.push_back({
"amdgpu-flat-work-group-size[1]", FlatWorkGroupSize.second});
1749 std::pair<unsigned, unsigned> WavesPerEU = ST->getWavesPerEU(
F);
1750 LB.push_back({
"amdgpu-waves-per-eu[0]", WavesPerEU.first});
1751 LB.push_back({
"amdgpu-waves-per-eu[1]", WavesPerEU.second});
1756 if (!ST->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1763 Attribute IEEEAttr =
F->getFnAttribute(
"amdgpu-ieee");
1778 if ((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1779 VecTy->getElementType()->isIntegerTy(8)) {
1790 if (VecTy->getElementType()->isIntegerTy(8)) {
1801 case Intrinsic::amdgcn_wave_shuffle:
1808 if (isAlwaysUniform(V))
1811 if (isSourceOfDivergence(V))
1819 bool HasBaseReg, int64_t Scale,
1820 unsigned AddrSpace)
const {
1821 if (HasBaseReg && Scale != 0) {
1825 if (getST()->hasScaleOffset() && Ty && Ty->isSized() &&
1845 unsigned EffInsnsA =
A.Insns +
A.ScaleCost;
1846 unsigned EffInsnsB =
B.Insns +
B.ScaleCost;
1848 return std::tie(EffInsnsA,
A.NumIVMuls,
A.AddRecCost,
A.NumBaseAdds,
1849 A.SetupCost,
A.ImmCost,
A.NumRegs) <
1850 std::tie(EffInsnsB,
B.NumIVMuls,
B.AddRecCost,
B.NumBaseAdds,
1851 B.SetupCost,
B.ImmCost,
B.NumRegs);
1868 case Intrinsic::amdgcn_wave_shuffle:
1871 return UniformArgs[0] || UniformArgs[1];
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static unsigned getNumElements(Type *Ty)
This file implements the SmallBitVector class.
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfParts(Type *Tp) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned getArgOperandNo(const Use *U) const
Given a use for a arg operand, get the arg operand number that corresponds to it.
This class represents a function call, abstracting a target machine's calling convention.
Conditional Branch instruction.
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
unsigned getNumberOfRegisters(unsigned RCID) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLSRCostLess(const TTI::LSRCost &A, const TTI::LSRCost &B) const override
bool shouldPrefetchAddressSpace(unsigned AS) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool hasBranchDivergence(const Function *F=nullptr) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
unsigned getInliningThresholdMultiplier() const override
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
unsigned adjustInliningThreshold(const CallBase *CB) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool shouldDropLSRSolutionIfLessProfitable() const override
int getInliningLastCallToStaticBonus() const override
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
ValueUniformity getValueUniformity(const Value *V) const override
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
unsigned getMinVectorRegisterBitWidth() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
bool isNumRegsMajorCostOfLSR() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool hasApproxFunc() const LLVM_READONLY
Determine whether the approximate-math-functions flag is set.
LLVM_ABI bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
FastMathFlags getFlags() const
Type * getReturnType() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
A Module instance is used to store all the information related to an LLVM module.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
StringRef - Represent a constant reference to a string, i.e.
std::vector< AsmOperandInfo > AsmOperandInfoVector
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
bool isFlatGlobalAddrSpace(unsigned AS)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isExtendedGlobalAddrSpace(unsigned AS)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ FADD
Simple binary floating point operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ AND
Bitwise operators - logical and, logical or, logical xor.
LLVM_ABI int getInstrCost()
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
@ Custom
The result value requires a custom uniformity check.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr DenormalMode getPreserveSign()
uint64_t getScalarSizeInBits() const
Information about a load/store intrinsic defined by the target.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const