29#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
39 "amdgpu-unroll-threshold-private",
40 cl::desc(
"Unroll threshold for AMDGPU if private memory used in a loop"),
44 "amdgpu-unroll-threshold-local",
45 cl::desc(
"Unroll threshold for AMDGPU if local memory used in a loop"),
49 "amdgpu-unroll-threshold-if",
50 cl::desc(
"Unroll threshold increment for AMDGPU for each if statement inside loop"),
54 "amdgpu-unroll-runtime-local",
55 cl::desc(
"Allow runtime unroll for AMDGPU if local memory used in a loop"),
59 "amdgpu-unroll-max-block-to-analyze",
60 cl::desc(
"Inner loop block size threshold to analyze in unroll for AMDGPU"),
65 cl::desc(
"Cost of alloca argument"));
73 cl::desc(
"Maximum alloca size to use for inline cost"));
78 cl::desc(
"Maximum number of BBs allowed in a function after inlining"
79 " (compile time constraint)"));
83 "amdgpu-memcpy-loop-unroll",
84 cl::desc(
"Unroll factor (affecting 4x32-bit operations) to use for memory "
85 "operations when lowering statically-sized memcpy, memmove, or"
97 for (
const Value *V :
I->operand_values()) {
100 return SubLoop->contains(PHI); }))
110 TargetTriple(TM->getTargetTriple()),
112 TLI(ST->getTargetLowering()) {}
117 const Function &
F = *L->getHeader()->getParent();
119 F.getFnAttributeAsParsedInteger(
"amdgpu-unroll-threshold", 300);
120 UP.
MaxCount = std::numeric_limits<unsigned>::max();
135 const unsigned MaxAlloca = (256 - 16) * 4;
141 if (
MDNode *LoopUnrollThreshold =
143 if (LoopUnrollThreshold->getNumOperands() == 2) {
145 LoopUnrollThreshold->getOperand(1));
146 if (MetaThresholdValue) {
152 ThresholdPrivate = std::min(ThresholdPrivate, UP.
Threshold);
153 ThresholdLocal = std::min(ThresholdLocal, UP.
Threshold);
158 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
161 unsigned LocalGEPsSeen = 0;
164 return SubLoop->contains(BB); }))
177 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
178 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
184 << *L <<
" due to " << *Br <<
'\n');
196 unsigned AS =
GEP->getAddressSpace();
197 unsigned Threshold = 0;
199 Threshold = ThresholdPrivate;
201 Threshold = ThresholdLocal;
209 const Value *Ptr =
GEP->getPointerOperand();
215 if (!AllocaSize || AllocaSize->getFixedValue() > MaxAlloca)
224 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
229 << *L <<
" due to LDS use.\n");
234 bool HasLoopDef =
false;
237 if (!Inst || L->isLoopInvariant(
Op))
241 return SubLoop->contains(Inst); }))
265 << *L <<
" due to " << *
GEP <<
'\n');
288 AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,
289 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureUseFlatForGlobal,
290 AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode,
292 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
295 AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK,
296 AMDGPU::FeatureTrapHandler,
300 AMDGPU::FeatureSRAMECC,
303 AMDGPU::FeatureFastFMAF32, AMDGPU::FeatureHalfRate64Ops};
308 TLI(ST->getTargetLowering()), CommonTTI(TM,
F),
309 IsGraphics(
AMDGPU::isGraphics(
F.getCallingConv())) {
312 HasFP64FP16Denormals =
317 return !
F || !ST->isSingleLaneExecution(*
F);
339 : ST->hasPackedFP32Ops() ? 64
352 if (Opcode == Instruction::Load || Opcode == Instruction::Store)
353 return 32 * 4 / ElemWidth;
356 return (ElemWidth == 8 && ST->has16BitInsts()) ? 4
357 : (ElemWidth == 16 && ST->has16BitInsts()) ? 2
358 : (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
359 : (ElemWidth == 64 &&
360 (ST->hasPackedFP64Ops() || ST->hasPackedU64Ops()))
371 return !ST->hasGFX940Insts() && !ST->hasGFX950Insts();
375 unsigned ChainSizeInBytes,
377 unsigned VecRegBitWidth = VF * LoadSize;
380 return 128 / LoadSize;
386 unsigned ChainSizeInBytes,
388 unsigned VecRegBitWidth = VF * StoreSize;
389 if (VecRegBitWidth > 128)
390 return 128 / StoreSize;
406 return 8 * ST->getMaxPrivateElementSize();
414 unsigned AddrSpace)
const {
419 return (Alignment >= 4 || ST->hasUnalignedScratchAccessEnabled()) &&
420 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
427 unsigned AddrSpace)
const {
433 unsigned AddrSpace)
const {
443 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
444 std::optional<uint32_t> AtomicElementSize)
const {
446 if (AtomicElementSize)
460 unsigned I32EltsInVector = 4;
470 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
472 std::optional<uint32_t> AtomicCpySize)
const {
476 OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
477 DestAlign, AtomicCpySize);
480 while (RemainingBytes >= 16) {
482 RemainingBytes -= 16;
486 while (RemainingBytes >= 8) {
492 while (RemainingBytes >= 4) {
498 while (RemainingBytes >= 2) {
504 while (RemainingBytes) {
522 case Intrinsic::amdgcn_ds_ordered_add:
523 case Intrinsic::amdgcn_ds_ordered_swap: {
526 if (!Ordering || !Volatile)
529 unsigned OrderingVal = Ordering->getZExtValue();
536 Info.WriteMem =
true;
537 Info.IsVolatile = !Volatile->isZero();
551 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
552 int ISD = TLI->InstructionOpcodeToISD(Opcode);
556 unsigned NElts = LT.second.isVector() ?
557 LT.second.getVectorNumElements() : 1;
566 return get64BitInstrCost(
CostKind) * LT.first * NElts;
568 if (ST->has16BitInsts() && SLT == MVT::i16)
569 NElts = (NElts + 1) / 2;
572 return getFullRateInstrCost() * LT.first * NElts;
575 if (SLT == MVT::i64 && ST->hasPackedU64Ops())
576 NElts = (NElts + 1) / 2;
581 if (SLT == MVT::i64) {
583 return 2 * getFullRateInstrCost() * LT.first * NElts;
586 if (ST->has16BitInsts() && SLT == MVT::i16)
587 NElts = (NElts + 1) / 2;
589 return LT.first * NElts * getFullRateInstrCost();
591 const int QuarterRateCost = getQuarterRateInstrCost(
CostKind);
592 if (SLT == MVT::i64) {
593 const int FullRateCost = getFullRateInstrCost();
594 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
597 if (ST->has16BitInsts() && SLT == MVT::i16)
598 NElts = (NElts + 1) / 2;
601 return QuarterRateCost * NElts * LT.first;
609 const int OPC = TLI->InstructionOpcodeToISD(
FAdd->getOpcode());
611 if (ST->hasMadMacF32Insts() && SLT == MVT::f32 && !HasFP32Denormals)
613 if (ST->has16BitInsts() && SLT == MVT::f16 && !HasFP64FP16Denormals)
626 if (ST->hasPackedFP32Ops() && SLT == MVT::f32)
627 NElts = (NElts + 1) / 2;
628 if (ST->hasBF16PackedInsts() && SLT == MVT::bf16)
629 NElts = (NElts + 1) / 2;
630 if (SLT == MVT::f64) {
631 if (ST->hasPackedFP64Ops())
632 NElts = (NElts + 1) / 2;
633 return LT.first * NElts * get64BitInstrCost(
CostKind);
636 if (ST->has16BitInsts() && SLT == MVT::f16)
637 NElts = (NElts + 1) / 2;
639 if (SLT == MVT::f32 || SLT == MVT::f16 || SLT == MVT::bf16)
640 return LT.first * NElts * getFullRateInstrCost();
646 if (SLT == MVT::f64) {
651 if (!ST->hasUsableDivScaleConditionOutput())
652 Cost += 3 * getFullRateInstrCost();
654 return LT.first *
Cost * NElts;
659 if ((SLT == MVT::f32 && !HasFP32Denormals) ||
660 (SLT == MVT::f16 && ST->has16BitInsts())) {
661 return LT.first * getTransInstrCost(
CostKind) * NElts;
665 if (SLT == MVT::f16 && ST->has16BitInsts()) {
671 int Cost = 4 * getFullRateInstrCost() + 2 * getTransInstrCost(
CostKind);
672 return LT.first *
Cost * NElts;
679 int Cost = getTransInstrCost(
CostKind) + getFullRateInstrCost();
680 return LT.first *
Cost * NElts;
683 if (SLT == MVT::f32 || SLT == MVT::f16) {
685 int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
688 if (!HasFP32Denormals) {
690 Cost += 2 * getFullRateInstrCost();
693 return LT.first * NElts *
Cost;
699 return TLI->isFNegFree(SLT) ? 0 : NElts;
713 case Intrinsic::fmuladd:
714 case Intrinsic::copysign:
715 case Intrinsic::minimumnum:
716 case Intrinsic::maximumnum:
717 case Intrinsic::canonicalize:
719 case Intrinsic::round:
720 case Intrinsic::uadd_sat:
721 case Intrinsic::usub_sat:
722 case Intrinsic::sadd_sat:
723 case Intrinsic::ssub_sat:
734 switch (ICA.
getID()) {
735 case Intrinsic::fabs:
738 case Intrinsic::amdgcn_workitem_id_x:
739 case Intrinsic::amdgcn_workitem_id_y:
740 case Intrinsic::amdgcn_workitem_id_z:
744 case Intrinsic::amdgcn_workgroup_id_x:
745 case Intrinsic::amdgcn_workgroup_id_y:
746 case Intrinsic::amdgcn_workgroup_id_z:
747 case Intrinsic::amdgcn_lds_kernel_id:
748 case Intrinsic::amdgcn_dispatch_ptr:
749 case Intrinsic::amdgcn_dispatch_id:
750 case Intrinsic::amdgcn_implicitarg_ptr:
751 case Intrinsic::amdgcn_queue_ptr:
763 case Intrinsic::exp2:
764 case Intrinsic::exp10: {
766 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
769 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
771 if (SLT == MVT::f64) {
773 if (IID == Intrinsic::exp)
775 else if (IID == Intrinsic::exp10)
781 if (SLT == MVT::f32) {
782 unsigned NumFullRateOps = 0;
784 unsigned NumTransOps = 1;
790 NumFullRateOps = ST->hasFastFMAF32() ? 13 : 17;
792 if (IID == Intrinsic::exp) {
795 }
else if (IID == Intrinsic::exp10) {
801 if (HasFP32Denormals)
806 NumTransOps * getTransInstrCost(
CostKind);
807 return LT.first * NElts *
Cost;
813 case Intrinsic::log2:
814 case Intrinsic::log10: {
815 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
818 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
820 if (SLT == MVT::f32) {
821 unsigned NumFullRateOps = 0;
823 if (IID == Intrinsic::log2) {
831 NumFullRateOps = ST->hasFastFMAF32() ? 8 : 11;
834 if (HasFP32Denormals)
838 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
839 return LT.first * NElts *
Cost;
845 case Intrinsic::cos: {
846 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
849 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
851 if (SLT == MVT::f32) {
853 unsigned NumFullRateOps = ST->hasTrigReducedRange() ? 2 : 1;
856 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
857 return LT.first * NElts *
Cost;
862 case Intrinsic::sqrt: {
863 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
866 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
868 if (SLT == MVT::f32) {
869 unsigned NumFullRateOps = 0;
873 NumFullRateOps = HasFP32Denormals ? 17 : 16;
877 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
878 return LT.first * NElts *
Cost;
890 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
892 unsigned NElts = LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
894 if ((ST->hasVOP3PInsts() &&
895 (SLT == MVT::f16 || SLT == MVT::i16 ||
896 (SLT == MVT::bf16 && ST->hasBF16PackedInsts()))) ||
897 (ST->hasPackedFP32Ops() && SLT == MVT::f32) ||
898 (ST->hasPackedFP64Ops() && SLT == MVT::f64) ||
899 (ST->hasPackedU64Ops() && SLT == MVT::i64))
900 NElts = (NElts + 1) / 2;
903 unsigned InstRate = getQuarterRateInstrCost(
CostKind);
905 switch (ICA.
getID()) {
907 case Intrinsic::fmuladd:
908 if (SLT == MVT::f64) {
909 InstRate = get64BitInstrCost(
CostKind);
913 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
914 InstRate = getFullRateInstrCost();
916 InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost(
CostKind)
917 : getQuarterRateInstrCost(
CostKind);
920 case Intrinsic::copysign:
921 return NElts * getFullRateInstrCost();
922 case Intrinsic::minimumnum:
923 case Intrinsic::maximumnum: {
935 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
936 InstRate = BaseRate *
NumOps;
939 case Intrinsic::canonicalize: {
941 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
944 case Intrinsic::uadd_sat:
945 case Intrinsic::usub_sat:
946 case Intrinsic::sadd_sat:
947 case Intrinsic::ssub_sat: {
948 if (SLT == MVT::i16 || SLT == MVT::i32)
949 InstRate = getFullRateInstrCost();
951 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
958 if (SLT == MVT::i16 || SLT == MVT::i32)
959 InstRate = 2 * getFullRateInstrCost();
965 return LT.first * NElts * InstRate;
971 assert((
I ==
nullptr ||
I->getOpcode() == Opcode) &&
972 "Opcode should reflect passed instruction.");
975 const int CBrCost = SCost ? 5 : 7;
977 case Instruction::UncondBr:
979 return SCost ? 1 : 4;
980 case Instruction::CondBr:
984 case Instruction::Switch: {
988 return (
SI ? (
SI->getNumCases() + 1) : 4) * (CBrCost + 1);
990 case Instruction::Ret:
991 return SCost ? 1 : 10;
998 std::optional<FastMathFlags> FMF,
1003 EVT OrigTy = TLI->getValueType(
DL, Ty);
1010 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1011 return LT.first * getFullRateInstrCost();
1018 EVT OrigTy = TLI->getValueType(
DL, Ty);
1025 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1026 return LT.first * getHalfRateInstrCost(
CostKind);
1033 case Instruction::ExtractElement:
1034 case Instruction::InsertElement: {
1041 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
1052 return Index % 4 == 0 ? 0 : 1;
1077 if (Indices.
size() > 1)
1083 TLI->ParseConstraints(
DL, ST->getRegisterInfo(), *CI);
1085 const int TargetOutputIdx = Indices.
empty() ? -1 : Indices[0];
1088 for (
auto &TC : TargetConstraints) {
1093 if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
1096 TLI->ComputeConstraintToUse(TC,
SDValue());
1099 TRI, TC.ConstraintCode, TC.ConstraintVT).second;
1103 if (!RC || !
TRI->isSGPRClass(RC))
1133bool GCNTTIImpl::isSourceOfDivergence(
const Value *V)
const {
1157 case Intrinsic::read_register:
1159 case Intrinsic::amdgcn_addrspacecast_nonnull: {
1161 Intrinsic->getOperand(0)->getType()->getPointerAddressSpace();
1162 unsigned DstAS =
Intrinsic->getType()->getPointerAddressSpace();
1165 ST->hasGloballyAddressableScratch();
1167 case Intrinsic::amdgcn_workitem_id_y:
1168 case Intrinsic::amdgcn_workitem_id_z: {
1173 *
F, IID == Intrinsic::amdgcn_workitem_id_y ? 1 : 2);
1174 return !HasUniformYZ && (!ThisDimSize || *ThisDimSize != 1);
1183 if (CI->isInlineAsm())
1198 ST->hasGloballyAddressableScratch();
1204bool GCNTTIImpl::isAlwaysUniform(
const Value *V)
const {
1209 if (CI->isInlineAsm())
1227 bool XDimDoesntResetWithinWaves =
false;
1230 XDimDoesntResetWithinWaves = ST->hasWavefrontsEvenlySplittingXDim(*
F);
1232 using namespace llvm::PatternMatch;
1238 return C >= ST->getWavefrontSizeLog2() && XDimDoesntResetWithinWaves;
1245 ST->getWavefrontSizeLog2() &&
1246 XDimDoesntResetWithinWaves;
1261 case Intrinsic::amdgcn_if:
1262 case Intrinsic::amdgcn_else: {
1263 ArrayRef<unsigned> Indices = ExtValue->
getIndices();
1264 return Indices.
size() == 1 && Indices[0] == 1;
1281 case Intrinsic::amdgcn_is_shared:
1282 case Intrinsic::amdgcn_is_private:
1283 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1284 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1285 case Intrinsic::amdgcn_load_to_lds:
1286 case Intrinsic::amdgcn_make_buffer_rsrc:
1296 Value *NewV)
const {
1297 auto IntrID =
II->getIntrinsicID();
1299 case Intrinsic::amdgcn_is_shared:
1300 case Intrinsic::amdgcn_is_private: {
1301 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
1309 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1310 case Intrinsic::amdgcn_flat_atomic_fmin_num: {
1311 Type *DestTy =
II->getType();
1318 M,
II->getIntrinsicID(), {DestTy, SrcTy, DestTy});
1319 II->setArgOperand(0, NewV);
1320 II->setCalledFunction(NewDecl);
1323 case Intrinsic::amdgcn_load_to_lds: {
1328 II->setArgOperand(0, NewV);
1329 II->setCalledFunction(NewDecl);
1332 case Intrinsic::amdgcn_make_buffer_rsrc: {
1334 Type *DstTy =
II->getType();
1337 M,
II->getIntrinsicID(), {DstTy, SrcTy});
1338 II->setArgOperand(0, NewV);
1339 II->setCalledFunction(NewDecl);
1360 unsigned ScalarSize =
DL.getTypeSizeInBits(SrcTy->getElementType());
1362 (ScalarSize == 16 || ScalarSize == 8)) {
1375 unsigned NumSrcElts = SrcVecTy->getNumElements();
1376 if (ST->hasVOP3PInsts() && ScalarSize == 16 && NumSrcElts == 2 &&
1382 unsigned EltsPerReg = 32 / ScalarSize;
1390 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1393 if (Index % EltsPerReg == 0)
1396 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1402 unsigned NumDstElts = DstVecTy->getNumElements();
1404 unsigned EndIndex = Index + NumInsertElts;
1405 unsigned BeginSubIdx = Index % EltsPerReg;
1406 unsigned EndSubIdx = EndIndex % EltsPerReg;
1409 if (BeginSubIdx != 0) {
1417 if (EndIndex < NumDstElts && BeginSubIdx < EndSubIdx)
1426 unsigned NumElts = DstVecTy->getNumElements();
1430 unsigned EltsFromLHS = NumElts - Index;
1431 bool LHSIsAligned = (Index % EltsPerReg) == 0;
1432 bool RHSIsAligned = (EltsFromLHS % EltsPerReg) == 0;
1433 if (LHSIsAligned && RHSIsAligned)
1435 if (LHSIsAligned && !RHSIsAligned)
1436 return divideCeil(NumElts, EltsPerReg) - (EltsFromLHS / EltsPerReg);
1437 if (!LHSIsAligned && RHSIsAligned)
1445 if (!Mask.empty()) {
1455 for (
unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx += EltsPerReg) {
1458 for (
unsigned I = 0;
I < EltsPerReg && DstIdx +
I < Mask.size(); ++
I) {
1459 int SrcIdx = Mask[DstIdx +
I];
1463 if (SrcIdx < (
int)NumSrcElts) {
1464 Reg = SrcIdx / EltsPerReg;
1465 if (SrcIdx % EltsPerReg !=
I)
1468 Reg = NumSrcElts + (SrcIdx - NumSrcElts) / EltsPerReg;
1469 if ((SrcIdx - NumSrcElts) % EltsPerReg !=
I)
1475 if (Regs.
size() >= 2)
1495 for (
auto &
Op :
I->operands()) {
1508 if (OpInst->getType()->isVectorTy() && OpInst->getNumOperands() > 1) {
1510 if (VecOpInst && VecOpInst->
hasOneUse())
1515 OpInst->getOperand(0),
1516 OpInst->getOperand(1)) == 0) {
1525 unsigned EltSize =
DL.getTypeSizeInBits(
1530 if (EltSize < 16 || !ST->has16BitInsts())
1533 int NumSubElts, SubIndex;
1534 if (Shuffle->changesLength()) {
1535 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) {
1540 if ((Shuffle->isExtractSubvectorMask(SubIndex) ||
1541 Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) &&
1542 !(SubIndex & 0x1)) {
1548 if (Shuffle->isReverse() || Shuffle->isZeroEltSplat() ||
1549 Shuffle->isSingleSource()) {
1556 return !
Ops.empty();
1567 const FeatureBitset &CallerBits = CallerST->getFeatureBits();
1568 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
1570 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
1571 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
1572 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
1582 if (Callee->hasFnAttribute(Attribute::AlwaysInline) ||
1583 Callee->hasFnAttribute(Attribute::InlineHint))
1589 if (Callee->size() == 1)
1591 size_t BBSize = Caller->size() + Callee->size() - 1;
1601 const int NrOfSGPRUntilSpill = 26;
1602 const int NrOfVGPRUntilSpill = 32;
1606 unsigned adjustThreshold = 0;
1612 for (
auto ArgVT : ValueVTs) {
1616 SGPRsInUse += CCRegNum;
1618 VGPRsInUse += CCRegNum;
1628 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1631 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1637 adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
1639 adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
1641 return adjustThreshold;
1650 unsigned AllocaSize = 0;
1657 unsigned AddrSpace = Ty->getAddressSpace();
1667 AllocaSize +=
Size->getFixedValue();
1711 static_assert(InlinerVectorBonusPercent == 0,
"vector bonus assumed to be 0");
1715 return BB.getTerminator()->getNumSuccessors() > 1;
1718 Threshold += Threshold / 2;
1726 unsigned AllocaThresholdBonus =
1727 (Threshold * ArgAllocaSize->getFixedValue()) / AllocaSize;
1729 return AllocaThresholdBonus;
1735 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
1740 CommonTTI.getPeelingPreferences(L, SE, PP);
1744 return getQuarterRateInstrCost(
CostKind);
1748 return ST->hasFullRate64Ops()
1749 ? getFullRateInstrCost()
1750 : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(
CostKind)
1751 : getQuarterRateInstrCost(
CostKind);
1754std::pair<InstructionCost, MVT>
1755GCNTTIImpl::getTypeLegalizationCost(
Type *Ty)
const {
1757 auto Size =
DL.getTypeSizeInBits(Ty);
1769 return ST->hasPrefetch() ? 128 : 0;
1780 LB.
push_back({
"amdgpu-max-num-workgroups[0]", MaxNumWorkgroups[0]});
1781 LB.push_back({
"amdgpu-max-num-workgroups[1]", MaxNumWorkgroups[1]});
1782 LB.push_back({
"amdgpu-max-num-workgroups[2]", MaxNumWorkgroups[2]});
1783 std::pair<unsigned, unsigned> FlatWorkGroupSize =
1784 ST->getFlatWorkGroupSizes(
F);
1785 LB.push_back({
"amdgpu-flat-work-group-size[0]", FlatWorkGroupSize.first});
1786 LB.push_back({
"amdgpu-flat-work-group-size[1]", FlatWorkGroupSize.second});
1787 std::pair<unsigned, unsigned> WavesPerEU = ST->getWavesPerEU(
F);
1788 LB.push_back({
"amdgpu-waves-per-eu[0]", WavesPerEU.first});
1789 LB.push_back({
"amdgpu-waves-per-eu[1]", WavesPerEU.second});
1794 if (!ST->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1801 Attribute IEEEAttr =
F->getFnAttribute(
"amdgpu-ieee");
1816 if ((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1818 VecTy->getElementType()->isIntegerTy(8)) {
1829 if (VecTy->getElementType()->isIntegerTy(8)) {
1840 case Intrinsic::amdgcn_wave_shuffle:
1847 if (isAlwaysUniform(V))
1850 if (isSourceOfDivergence(V))
1858 bool HasBaseReg, int64_t Scale,
1859 unsigned AddrSpace)
const {
1860 if (HasBaseReg && Scale != 0) {
1864 if (getST()->hasScaleOffset() && Ty && Ty->isSized() &&
1884 unsigned EffInsnsA =
A.Insns +
A.ScaleCost;
1885 unsigned EffInsnsB =
B.Insns +
B.ScaleCost;
1887 return std::tie(EffInsnsA,
A.NumIVMuls,
A.AddRecCost,
A.NumBaseAdds,
1888 A.SetupCost,
A.ImmCost,
A.NumRegs) <
1889 std::tie(EffInsnsB,
B.NumIVMuls,
B.AddRecCost,
B.NumBaseAdds,
1890 B.SetupCost,
B.ImmCost,
B.NumRegs);
1907 case Intrinsic::amdgcn_wave_shuffle:
1910 return UniformArgs[0] || UniformArgs[1];
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static unsigned getNumElements(Type *Ty)
This file implements the SmallBitVector class.
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
This class represents an incoming formal argument to a Function.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfParts(Type *Tp) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned getArgOperandNo(const Use *U) const
Given a use for a arg operand, get the arg operand number that corresponds to it.
This class represents a function call, abstracting a target machine's calling convention.
Conditional Branch instruction.
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
unsigned getNumberOfRegisters(unsigned RCID) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLSRCostLess(const TTI::LSRCost &A, const TTI::LSRCost &B) const override
bool shouldPrefetchAddressSpace(unsigned AS) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool hasBranchDivergence(const Function *F=nullptr) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
unsigned getInliningThresholdMultiplier() const override
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
unsigned adjustInliningThreshold(const CallBase *CB) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool shouldDropLSRSolutionIfLessProfitable() const override
int getInliningLastCallToStaticBonus() const override
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
ValueUniformity getValueUniformity(const Value *V) const override
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
bool preferSLPInstCountCheck() const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
unsigned getMinVectorRegisterBitWidth() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
bool isNumRegsMajorCostOfLSR() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool hasApproxFunc() const LLVM_READONLY
Determine whether the approximate-math-functions flag is set.
LLVM_ABI bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
FastMathFlags getFlags() const
Type * getReturnType() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
A Module instance is used to store all the information related to an LLVM module.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Represent a constant reference to a string, i.e.
std::vector< AsmOperandInfo > AsmOperandInfoVector
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
bool isFlatGlobalAddrSpace(unsigned AS)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isExtendedGlobalAddrSpace(unsigned AS)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ FADD
Simple binary floating point operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ AND
Bitwise operators - logical and, logical or, logical xor.
LLVM_ABI int getInstrCost()
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
@ Custom
The result value requires a custom uniformity check.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr DenormalMode getPreserveSign()
uint64_t getScalarSizeInBits() const
Information about a load/store intrinsic defined by the target.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const