21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
45class BlockFrequencyInfo;
52class OptimizationRemarkEmitter;
53class InterleavedAccessInfo;
58class LoopVectorizationLegality;
59class ProfileSummaryInfo;
60class RecurrenceDescriptor;
65class TargetLibraryInfo;
123 Type *RetTy =
nullptr;
136 bool TypeBasedOnly =
false);
209class TargetTransformInfo;
334 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
342 const PointersChainInfo &
Info,
Type *AccessTy,
486 std::pair<const Value *, unsigned>
675 KnownBits & Known,
bool &KnownBitsComputed)
const;
682 SimplifyAndSetOp)
const;
726 bool HasBaseReg, int64_t Scale,
728 int64_t ScalableOffset = 0)
const;
847 unsigned AddrSpace = 0)
const;
891 const APInt &DemandedElts,
892 bool Insert,
bool Extract,
956 bool IsZeroCmp)
const;
988 unsigned *
Fast =
nullptr)
const;
1173 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1184 Type *ScalarValTy)
const;
1190 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1237 unsigned NumStridedMemAccesses,
1238 unsigned NumPrefetches,
bool HasCall)
const;
1278 unsigned Opcode,
Type *Ty,
1282 ArrayRef<const Value *>
Args = std::nullopt,
1283 const Instruction *CxtI =
nullptr,
1284 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1294 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1295 const SmallBitVector &OpcodeMask,
1309 VectorType *SubTp =
nullptr, ArrayRef<const Value *> Args = std::nullopt,
1310 const Instruction *CxtI =
nullptr)
const;
1361 unsigned Index)
const;
1389 Value *Op1 =
nullptr)
const;
1399 unsigned Index = -1)
const;
1408 const APInt &DemandedDstElts,
1417 const Instruction *
I =
nullptr)
const;
1441 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1455 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1473 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1478 return FMF && !(*FMF).allowReassoc();
1506 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1555 const SCEV *
Ptr =
nullptr)
const;
1579 Type *ExpectedType)
const;
1584 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1585 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1595 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1596 unsigned SrcAlign,
unsigned DestAlign,
1597 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1611 unsigned DefaultCallPenalty)
const;
1647 unsigned AddrSpace)
const;
1651 unsigned AddrSpace)
const;
1663 unsigned ChainSizeInBytes,
1669 unsigned ChainSizeInBytes,
1729 Align Alignment)
const;
1788 template <
typename T>
class Model;
1790 std::unique_ptr<Concept> TTIImpl;
1836 virtual std::pair<const Value *, unsigned>
1840 Value *NewV)
const = 0;
1858 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1863 SimplifyAndSetOp) = 0;
1868 int64_t BaseOffset,
bool HasBaseReg,
1869 int64_t Scale,
unsigned AddrSpace,
1871 int64_t ScalableOffset) = 0;
1893 Align Alignment) = 0;
1895 Align Alignment) = 0;
1909 bool HasBaseReg, int64_t Scale,
1910 unsigned AddrSpace) = 0;
1922 const APInt &DemandedElts,
1923 bool Insert,
bool Extract,
1944 unsigned *
Fast) = 0;
1966 Type *Ty =
nullptr)
const = 0;
1976 bool IsScalable)
const = 0;
1977 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1979 Type *ScalarValTy)
const = 0;
1981 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2000 unsigned NumStridedMemAccesses,
2001 unsigned NumPrefetches,
2002 bool HasCall)
const = 0;
2021 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2035 unsigned Index) = 0;
2050 unsigned Index) = 0;
2054 const APInt &DemandedDstElts,
2072 bool VariableMask,
Align Alignment,
2077 bool VariableMask,
Align Alignment,
2084 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2087 std::optional<FastMathFlags> FMF,
2114 Type *ExpectedType) = 0;
2117 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2118 std::optional<uint32_t> AtomicElementSize)
const = 0;
2122 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2123 unsigned SrcAlign,
unsigned DestAlign,
2124 std::optional<uint32_t> AtomicCpySize)
const = 0;
2128 unsigned DefaultCallPenalty)
const = 0;
2139 unsigned AddrSpace)
const = 0;
2142 unsigned AddrSpace)
const = 0;
2147 unsigned ChainSizeInBytes,
2150 unsigned ChainSizeInBytes,
2164 Align Alignment)
const = 0;
2171template <
typename T>
2176 Model(
T Impl) : Impl(std::move(Impl)) {}
2177 ~Model()
override =
default;
2179 const DataLayout &getDataLayout()
const override {
2180 return Impl.getDataLayout();
2184 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2185 ArrayRef<const Value *>
Operands, Type *AccessType,
2189 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2191 const PointersChainInfo &
Info,
2196 unsigned getInliningThresholdMultiplier()
const override {
2197 return Impl.getInliningThresholdMultiplier();
2199 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2200 return Impl.adjustInliningThreshold(CB);
2202 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2203 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2205 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2206 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2208 int getInlinerVectorBonusPercent()
const override {
2209 return Impl.getInlinerVectorBonusPercent();
2211 unsigned getCallerAllocaCost(
const CallBase *CB,
2212 const AllocaInst *AI)
const override {
2213 return Impl.getCallerAllocaCost(CB, AI);
2215 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2216 return Impl.getMemcpyCost(
I);
2219 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2220 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2223 InstructionCost getInstructionCost(
const User *U,
2228 BranchProbability getPredictableBranchThreshold()
override {
2229 return Impl.getPredictableBranchThreshold();
2231 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2232 return Impl.hasBranchDivergence(
F);
2234 bool isSourceOfDivergence(
const Value *V)
override {
2235 return Impl.isSourceOfDivergence(V);
2238 bool isAlwaysUniform(
const Value *V)
override {
2239 return Impl.isAlwaysUniform(V);
2242 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2243 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2246 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2247 return Impl.addrspacesMayAlias(AS0, AS1);
2250 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2252 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2254 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2257 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2258 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2262 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2263 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2266 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2267 return Impl.getAssumedAddrSpace(V);
2270 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2272 std::pair<const Value *, unsigned>
2273 getPredicatedAddrSpace(
const Value *V)
const override {
2274 return Impl.getPredicatedAddrSpace(V);
2277 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2278 Value *NewV)
const override {
2279 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2282 bool isLoweredToCall(
const Function *
F)
override {
2283 return Impl.isLoweredToCall(
F);
2285 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2286 UnrollingPreferences &UP,
2287 OptimizationRemarkEmitter *ORE)
override {
2288 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2290 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2291 PeelingPreferences &PP)
override {
2292 return Impl.getPeelingPreferences(L, SE, PP);
2294 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2295 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2296 HardwareLoopInfo &HWLoopInfo)
override {
2297 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2299 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2300 return Impl.preferPredicateOverEpilogue(TFI);
2303 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2304 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2306 std::optional<Instruction *>
2307 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2308 return Impl.instCombineIntrinsic(IC,
II);
2310 std::optional<Value *>
2311 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2312 APInt DemandedMask, KnownBits &Known,
2313 bool &KnownBitsComputed)
override {
2314 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2317 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2318 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2319 APInt &UndefElts2, APInt &UndefElts3,
2320 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2321 SimplifyAndSetOp)
override {
2322 return Impl.simplifyDemandedVectorEltsIntrinsic(
2323 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2326 bool isLegalAddImmediate(int64_t Imm)
override {
2327 return Impl.isLegalAddImmediate(Imm);
2329 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2330 return Impl.isLegalAddScalableImmediate(Imm);
2332 bool isLegalICmpImmediate(int64_t Imm)
override {
2333 return Impl.isLegalICmpImmediate(Imm);
2335 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2336 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2337 Instruction *
I, int64_t ScalableOffset)
override {
2338 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2339 AddrSpace,
I, ScalableOffset);
2341 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2342 const TargetTransformInfo::LSRCost &C2)
override {
2343 return Impl.isLSRCostLess(C1, C2);
2345 bool isNumRegsMajorCostOfLSR()
override {
2346 return Impl.isNumRegsMajorCostOfLSR();
2348 bool shouldFoldTerminatingConditionAfterLSR()
const override {
2349 return Impl.shouldFoldTerminatingConditionAfterLSR();
2351 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2352 return Impl.shouldDropLSRSolutionIfLessProfitable();
2354 bool isProfitableLSRChainElement(Instruction *
I)
override {
2355 return Impl.isProfitableLSRChainElement(
I);
2357 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2358 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2359 DominatorTree *DT, AssumptionCache *AC,
2360 TargetLibraryInfo *LibInfo)
override {
2361 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2364 getPreferredAddressingMode(
const Loop *L,
2365 ScalarEvolution *SE)
const override {
2366 return Impl.getPreferredAddressingMode(L, SE);
2368 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2369 return Impl.isLegalMaskedStore(DataType, Alignment);
2371 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2372 return Impl.isLegalMaskedLoad(DataType, Alignment);
2374 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2375 return Impl.isLegalNTStore(DataType, Alignment);
2377 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2378 return Impl.isLegalNTLoad(DataType, Alignment);
2380 bool isLegalBroadcastLoad(Type *ElementTy,
2381 ElementCount NumElements)
const override {
2382 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2384 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2385 return Impl.isLegalMaskedScatter(DataType, Alignment);
2387 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2388 return Impl.isLegalMaskedGather(DataType, Alignment);
2390 bool forceScalarizeMaskedGather(
VectorType *DataType,
2391 Align Alignment)
override {
2392 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2394 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2395 Align Alignment)
override {
2396 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2398 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2399 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2401 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2402 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2404 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2405 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2407 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2408 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2410 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2411 const SmallBitVector &OpcodeMask)
const override {
2412 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2414 bool enableOrderedReductions()
override {
2415 return Impl.enableOrderedReductions();
2417 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2418 return Impl.hasDivRemOp(DataType, IsSigned);
2420 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2421 return Impl.hasVolatileVariant(
I, AddrSpace);
2423 bool prefersVectorizedAddressing()
override {
2424 return Impl.prefersVectorizedAddressing();
2426 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2427 StackOffset BaseOffset,
bool HasBaseReg,
2429 unsigned AddrSpace)
override {
2430 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2433 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2434 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2435 return Impl.isTruncateFree(Ty1, Ty2);
2437 bool isProfitableToHoist(Instruction *
I)
override {
2438 return Impl.isProfitableToHoist(
I);
2440 bool useAA()
override {
return Impl.useAA(); }
2441 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2442 unsigned getRegUsageForType(Type *Ty)
override {
2443 return Impl.getRegUsageForType(Ty);
2445 bool shouldBuildLookupTables()
override {
2446 return Impl.shouldBuildLookupTables();
2448 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2449 return Impl.shouldBuildLookupTablesForConstant(
C);
2451 bool shouldBuildRelLookupTables()
override {
2452 return Impl.shouldBuildRelLookupTables();
2454 bool useColdCCForColdCall(Function &
F)
override {
2455 return Impl.useColdCCForColdCall(
F);
2458 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2459 const APInt &DemandedElts,
2460 bool Insert,
bool Extract,
2462 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2466 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2467 ArrayRef<Type *> Tys,
2469 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2472 bool supportsEfficientVectorElementLoadStore()
override {
2473 return Impl.supportsEfficientVectorElementLoadStore();
2476 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2477 bool supportsTailCallFor(
const CallBase *CB)
override {
2478 return Impl.supportsTailCallFor(CB);
2481 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2482 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2484 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2485 bool IsZeroCmp)
const override {
2486 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2488 bool enableSelectOptimize()
override {
2489 return Impl.enableSelectOptimize();
2491 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2492 return Impl.shouldTreatInstructionLikeSelect(
I);
2494 bool enableInterleavedAccessVectorization()
override {
2495 return Impl.enableInterleavedAccessVectorization();
2497 bool enableMaskedInterleavedAccessVectorization()
override {
2498 return Impl.enableMaskedInterleavedAccessVectorization();
2500 bool isFPVectorizationPotentiallyUnsafe()
override {
2501 return Impl.isFPVectorizationPotentiallyUnsafe();
2503 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2505 unsigned *
Fast)
override {
2510 return Impl.getPopcntSupport(IntTyWidthInBit);
2512 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2514 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2515 return Impl.isExpensiveToSpeculativelyExecute(
I);
2518 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2519 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2522 InstructionCost getFPOpCost(Type *Ty)
override {
2523 return Impl.getFPOpCost(Ty);
2526 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2527 const APInt &Imm, Type *Ty)
override {
2528 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2530 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2532 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2534 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2535 const APInt &Imm, Type *Ty,
2537 Instruction *Inst =
nullptr)
override {
2538 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2541 const APInt &Imm, Type *Ty,
2543 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2545 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2546 const Function &Fn)
const override {
2547 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2549 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2550 return Impl.getNumberOfRegisters(ClassID);
2552 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2553 return Impl.hasConditionalLoadStoreForType(Ty);
2555 unsigned getRegisterClassForType(
bool Vector,
2556 Type *Ty =
nullptr)
const override {
2557 return Impl.getRegisterClassForType(
Vector, Ty);
2559 const char *getRegisterClassName(
unsigned ClassID)
const override {
2560 return Impl.getRegisterClassName(ClassID);
2562 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2563 return Impl.getRegisterBitWidth(K);
2565 unsigned getMinVectorRegisterBitWidth()
const override {
2566 return Impl.getMinVectorRegisterBitWidth();
2568 std::optional<unsigned>
getMaxVScale()
const override {
2569 return Impl.getMaxVScale();
2571 std::optional<unsigned> getVScaleForTuning()
const override {
2572 return Impl.getVScaleForTuning();
2574 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2575 return Impl.isVScaleKnownToBeAPowerOfTwo();
2577 bool shouldMaximizeVectorBandwidth(
2579 return Impl.shouldMaximizeVectorBandwidth(K);
2581 ElementCount getMinimumVF(
unsigned ElemWidth,
2582 bool IsScalable)
const override {
2583 return Impl.getMinimumVF(ElemWidth, IsScalable);
2585 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2586 return Impl.getMaximumVF(ElemWidth, Opcode);
2588 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2589 Type *ScalarValTy)
const override {
2590 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2592 bool shouldConsiderAddressTypePromotion(
2593 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2594 return Impl.shouldConsiderAddressTypePromotion(
2595 I, AllowPromotionWithoutCommonHeader);
2597 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2598 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2599 return Impl.getCacheSize(Level);
2601 std::optional<unsigned>
2602 getCacheAssociativity(
CacheLevel Level)
const override {
2603 return Impl.getCacheAssociativity(Level);
2606 std::optional<unsigned> getMinPageSize()
const override {
2607 return Impl.getMinPageSize();
2612 unsigned getPrefetchDistance()
const override {
2613 return Impl.getPrefetchDistance();
2619 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2620 unsigned NumStridedMemAccesses,
2621 unsigned NumPrefetches,
2622 bool HasCall)
const override {
2623 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2624 NumPrefetches, HasCall);
2630 unsigned getMaxPrefetchIterationsAhead()
const override {
2631 return Impl.getMaxPrefetchIterationsAhead();
2635 bool enableWritePrefetching()
const override {
2636 return Impl.enableWritePrefetching();
2640 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2641 return Impl.shouldPrefetchAddressSpace(AS);
2644 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2645 return Impl.getMaxInterleaveFactor(VF);
2647 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2649 ProfileSummaryInfo *PSI,
2650 BlockFrequencyInfo *BFI)
override {
2651 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2653 InstructionCost getArithmeticInstrCost(
2655 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2656 ArrayRef<const Value *> Args,
2657 const Instruction *CxtI =
nullptr)
override {
2658 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2661 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2663 const SmallBitVector &OpcodeMask,
2665 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2672 ArrayRef<const Value *> Args,
2673 const Instruction *CxtI)
override {
2674 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2677 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2680 const Instruction *
I)
override {
2681 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2683 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2685 unsigned Index)
override {
2686 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2689 const Instruction *
I =
nullptr)
override {
2690 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2692 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2695 const Instruction *
I)
override {
2696 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2698 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2700 unsigned Index, Value *Op0,
2701 Value *Op1)
override {
2702 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2704 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2706 unsigned Index)
override {
2710 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2711 const APInt &DemandedDstElts,
2713 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2716 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2719 OperandValueInfo OpInfo,
2720 const Instruction *
I)
override {
2724 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2727 const Instruction *
I)
override {
2728 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2731 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2734 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2738 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2739 bool VariableMask, Align Alignment,
2741 const Instruction *
I =
nullptr)
override {
2742 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2746 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2747 bool VariableMask, Align Alignment,
2749 const Instruction *
I =
nullptr)
override {
2750 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2753 InstructionCost getInterleavedMemoryOpCost(
2754 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2756 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2757 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2759 UseMaskForCond, UseMaskForGaps);
2762 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2763 std::optional<FastMathFlags> FMF,
2765 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2770 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2773 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2776 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2780 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2782 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2784 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2786 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2788 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2789 ArrayRef<Type *> Tys,
2793 unsigned getNumberOfParts(Type *Tp)
override {
2794 return Impl.getNumberOfParts(Tp);
2796 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2797 const SCEV *
Ptr)
override {
2798 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2800 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2801 return Impl.getCostOfKeepingLiveOverCall(Tys);
2803 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2804 MemIntrinsicInfo &
Info)
override {
2805 return Impl.getTgtMemIntrinsic(Inst,
Info);
2807 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2808 return Impl.getAtomicMemIntrinsicMaxElementSize();
2810 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2811 Type *ExpectedType)
override {
2812 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2814 Type *getMemcpyLoopLoweringType(
2815 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2816 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2817 std::optional<uint32_t> AtomicElementSize)
const override {
2818 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2819 DestAddrSpace, SrcAlign, DestAlign,
2822 void getMemcpyLoopResidualLoweringType(
2823 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2824 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2825 unsigned SrcAlign,
unsigned DestAlign,
2826 std::optional<uint32_t> AtomicCpySize)
const override {
2827 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2828 SrcAddrSpace, DestAddrSpace,
2829 SrcAlign, DestAlign, AtomicCpySize);
2832 const Function *Callee)
const override {
2833 return Impl.areInlineCompatible(Caller, Callee);
2835 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2836 unsigned DefaultCallPenalty)
const override {
2837 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2839 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2840 const ArrayRef<Type *> &Types)
const override {
2841 return Impl.areTypesABICompatible(Caller, Callee, Types);
2844 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2847 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2849 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2850 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2852 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2853 return Impl.isLegalToVectorizeLoad(LI);
2855 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2856 return Impl.isLegalToVectorizeStore(SI);
2858 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2859 unsigned AddrSpace)
const override {
2860 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2863 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2864 unsigned AddrSpace)
const override {
2865 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2868 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2869 ElementCount VF)
const override {
2870 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2872 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2873 return Impl.isElementTypeLegalForScalableVector(Ty);
2875 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2876 unsigned ChainSizeInBytes,
2878 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2880 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2881 unsigned ChainSizeInBytes,
2883 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2885 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2886 ReductionFlags Flags)
const override {
2887 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2889 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2890 ReductionFlags Flags)
const override {
2891 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2893 bool preferEpilogueVectorization()
const override {
2894 return Impl.preferEpilogueVectorization();
2897 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
2898 return Impl.shouldExpandReduction(
II);
2901 unsigned getGISelRematGlobalCost()
const override {
2902 return Impl.getGISelRematGlobalCost();
2905 unsigned getMinTripCountTailFoldingThreshold()
const override {
2906 return Impl.getMinTripCountTailFoldingThreshold();
2909 bool supportsScalableVectors()
const override {
2910 return Impl.supportsScalableVectors();
2913 bool enableScalableVectorization()
const override {
2914 return Impl.enableScalableVectorization();
2917 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2918 Align Alignment)
const override {
2919 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2923 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2924 return Impl.getVPLegalizationStrategy(PI);
2927 bool hasArmWideBranch(
bool Thumb)
const override {
2928 return Impl.hasArmWideBranch(Thumb);
2931 unsigned getMaxNumArgs()
const override {
2932 return Impl.getMaxNumArgs();
2936template <
typename T>
2938 : TTIImpl(new Model<
T>(Impl)) {}
2969 : TTICallback(Arg.TTICallback) {}
2971 : TTICallback(
std::
move(Arg.TTICallback)) {}
2973 TTICallback =
RHS.TTICallback;
2977 TTICallback = std::move(
RHS.TTICallback);
3009 std::optional<TargetTransformInfo>
TTI;
3011 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL