21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
44class BlockFrequencyInfo;
50class OptimizationRemarkEmitter;
51class InterleavedAccessInfo;
56class LoopVectorizationLegality;
57class ProfileSummaryInfo;
58class RecurrenceDescriptor;
64class TargetLibraryInfo;
120 Type *RetTy =
nullptr;
133 bool TypeBasedOnly =
false);
206class TargetTransformInfo;
331 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
339 const PointersChainInfo &
Info,
Type *AccessTy,
489 std::pair<const Value *, unsigned>
678 KnownBits & Known,
bool &KnownBitsComputed)
const;
685 SimplifyAndSetOp)
const;
729 bool HasBaseReg, int64_t Scale,
731 int64_t ScalableOffset = 0)
const;
845 unsigned AddrSpace = 0)
const;
889 const APInt &DemandedElts,
890 bool Insert,
bool Extract,
954 bool IsZeroCmp)
const;
986 unsigned *
Fast =
nullptr)
const;
1171 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1182 Type *ScalarValTy)
const;
1188 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1235 unsigned NumStridedMemAccesses,
1236 unsigned NumPrefetches,
bool HasCall)
const;
1276 unsigned Opcode,
Type *Ty,
1280 ArrayRef<const Value *>
Args = std::nullopt,
1281 const Instruction *CxtI =
nullptr,
1282 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1292 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1293 const SmallBitVector &OpcodeMask,
1307 VectorType *SubTp =
nullptr, ArrayRef<const Value *> Args = std::nullopt,
1308 const Instruction *CxtI =
nullptr)
const;
1359 unsigned Index)
const;
1387 Value *Op1 =
nullptr)
const;
1397 unsigned Index = -1)
const;
1406 const APInt &DemandedDstElts,
1415 const Instruction *
I =
nullptr)
const;
1439 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1453 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1471 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1476 return FMF && !(*FMF).allowReassoc();
1504 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1553 const SCEV *
Ptr =
nullptr)
const;
1577 Type *ExpectedType)
const;
1582 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
1583 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1593 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1595 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1609 unsigned DefaultCallPenalty)
const;
1645 unsigned AddrSpace)
const;
1649 unsigned AddrSpace)
const;
1661 unsigned ChainSizeInBytes,
1667 unsigned ChainSizeInBytes,
1739 Align Alignment)
const;
1798 template <
typename T>
class Model;
1800 std::unique_ptr<Concept> TTIImpl;
1847 virtual std::pair<const Value *, unsigned>
1851 Value *NewV)
const = 0;
1869 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1874 SimplifyAndSetOp) = 0;
1879 int64_t BaseOffset,
bool HasBaseReg,
1880 int64_t Scale,
unsigned AddrSpace,
1882 int64_t ScalableOffset) = 0;
1903 Align Alignment) = 0;
1905 Align Alignment) = 0;
1919 bool HasBaseReg, int64_t Scale,
1920 unsigned AddrSpace) = 0;
1932 const APInt &DemandedElts,
1933 bool Insert,
bool Extract,
1954 unsigned *
Fast) = 0;
1976 Type *Ty =
nullptr)
const = 0;
1986 bool IsScalable)
const = 0;
1987 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1989 Type *ScalarValTy)
const = 0;
1991 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2010 unsigned NumStridedMemAccesses,
2011 unsigned NumPrefetches,
2012 bool HasCall)
const = 0;
2031 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2045 unsigned Index) = 0;
2060 unsigned Index) = 0;
2064 const APInt &DemandedDstElts,
2082 bool VariableMask,
Align Alignment,
2087 bool VariableMask,
Align Alignment,
2094 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2097 std::optional<FastMathFlags> FMF,
2124 Type *ExpectedType) = 0;
2127 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
2128 std::optional<uint32_t> AtomicElementSize)
const = 0;
2132 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2134 std::optional<uint32_t> AtomicCpySize)
const = 0;
2138 unsigned DefaultCallPenalty)
const = 0;
2149 unsigned AddrSpace)
const = 0;
2152 unsigned AddrSpace)
const = 0;
2157 unsigned ChainSizeInBytes,
2160 unsigned ChainSizeInBytes,
2177 Align Alignment)
const = 0;
2184template <
typename T>
2189 Model(
T Impl) : Impl(std::move(Impl)) {}
2190 ~Model()
override =
default;
2192 const DataLayout &getDataLayout()
const override {
2193 return Impl.getDataLayout();
2197 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2198 ArrayRef<const Value *>
Operands, Type *AccessType,
2202 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2204 const PointersChainInfo &
Info,
2209 unsigned getInliningThresholdMultiplier()
const override {
2210 return Impl.getInliningThresholdMultiplier();
2212 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2213 return Impl.adjustInliningThreshold(CB);
2215 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2216 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2218 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2219 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2221 int getInlinerVectorBonusPercent()
const override {
2222 return Impl.getInlinerVectorBonusPercent();
2224 unsigned getCallerAllocaCost(
const CallBase *CB,
2225 const AllocaInst *AI)
const override {
2226 return Impl.getCallerAllocaCost(CB, AI);
2228 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2229 return Impl.getMemcpyCost(
I);
2232 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2233 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2236 InstructionCost getInstructionCost(
const User *U,
2241 BranchProbability getPredictableBranchThreshold()
override {
2242 return Impl.getPredictableBranchThreshold();
2244 InstructionCost getBranchMispredictPenalty()
override {
2245 return Impl.getBranchMispredictPenalty();
2247 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2248 return Impl.hasBranchDivergence(
F);
2250 bool isSourceOfDivergence(
const Value *V)
override {
2251 return Impl.isSourceOfDivergence(V);
2254 bool isAlwaysUniform(
const Value *V)
override {
2255 return Impl.isAlwaysUniform(V);
2258 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2259 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2262 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2263 return Impl.addrspacesMayAlias(AS0, AS1);
2266 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2268 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2270 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2273 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2274 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2278 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2279 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2282 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2283 return Impl.getAssumedAddrSpace(V);
2286 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2288 std::pair<const Value *, unsigned>
2289 getPredicatedAddrSpace(
const Value *V)
const override {
2290 return Impl.getPredicatedAddrSpace(V);
2293 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2294 Value *NewV)
const override {
2295 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2298 bool isLoweredToCall(
const Function *
F)
override {
2299 return Impl.isLoweredToCall(
F);
2301 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2302 UnrollingPreferences &UP,
2303 OptimizationRemarkEmitter *ORE)
override {
2304 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2306 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2307 PeelingPreferences &PP)
override {
2308 return Impl.getPeelingPreferences(L, SE, PP);
2310 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2311 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2312 HardwareLoopInfo &HWLoopInfo)
override {
2313 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2315 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2316 return Impl.preferPredicateOverEpilogue(TFI);
2319 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2320 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2322 std::optional<Instruction *>
2323 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2324 return Impl.instCombineIntrinsic(IC,
II);
2326 std::optional<Value *>
2327 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2328 APInt DemandedMask, KnownBits &Known,
2329 bool &KnownBitsComputed)
override {
2330 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2333 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2334 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2335 APInt &UndefElts2, APInt &UndefElts3,
2336 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2337 SimplifyAndSetOp)
override {
2338 return Impl.simplifyDemandedVectorEltsIntrinsic(
2339 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2342 bool isLegalAddImmediate(int64_t Imm)
override {
2343 return Impl.isLegalAddImmediate(Imm);
2345 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2346 return Impl.isLegalAddScalableImmediate(Imm);
2348 bool isLegalICmpImmediate(int64_t Imm)
override {
2349 return Impl.isLegalICmpImmediate(Imm);
2351 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2352 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2353 Instruction *
I, int64_t ScalableOffset)
override {
2354 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2355 AddrSpace,
I, ScalableOffset);
2357 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2358 const TargetTransformInfo::LSRCost &C2)
override {
2359 return Impl.isLSRCostLess(C1, C2);
2361 bool isNumRegsMajorCostOfLSR()
override {
2362 return Impl.isNumRegsMajorCostOfLSR();
2364 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2365 return Impl.shouldDropLSRSolutionIfLessProfitable();
2367 bool isProfitableLSRChainElement(Instruction *
I)
override {
2368 return Impl.isProfitableLSRChainElement(
I);
2370 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2371 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2372 DominatorTree *DT, AssumptionCache *AC,
2373 TargetLibraryInfo *LibInfo)
override {
2374 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2377 getPreferredAddressingMode(
const Loop *L,
2378 ScalarEvolution *SE)
const override {
2379 return Impl.getPreferredAddressingMode(L, SE);
2381 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2382 return Impl.isLegalMaskedStore(DataType, Alignment);
2384 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2385 return Impl.isLegalMaskedLoad(DataType, Alignment);
2387 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2388 return Impl.isLegalNTStore(DataType, Alignment);
2390 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2391 return Impl.isLegalNTLoad(DataType, Alignment);
2393 bool isLegalBroadcastLoad(Type *ElementTy,
2394 ElementCount NumElements)
const override {
2395 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2397 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2398 return Impl.isLegalMaskedScatter(DataType, Alignment);
2400 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2401 return Impl.isLegalMaskedGather(DataType, Alignment);
2403 bool forceScalarizeMaskedGather(
VectorType *DataType,
2404 Align Alignment)
override {
2405 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2407 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2408 Align Alignment)
override {
2409 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2411 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2412 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2414 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2415 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2417 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2418 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2420 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2421 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2423 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2424 const SmallBitVector &OpcodeMask)
const override {
2425 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2427 bool enableOrderedReductions()
override {
2428 return Impl.enableOrderedReductions();
2430 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2431 return Impl.hasDivRemOp(DataType, IsSigned);
2433 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2434 return Impl.hasVolatileVariant(
I, AddrSpace);
2436 bool prefersVectorizedAddressing()
override {
2437 return Impl.prefersVectorizedAddressing();
2439 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2440 StackOffset BaseOffset,
bool HasBaseReg,
2442 unsigned AddrSpace)
override {
2443 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2446 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2447 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2448 return Impl.isTruncateFree(Ty1, Ty2);
2450 bool isProfitableToHoist(Instruction *
I)
override {
2451 return Impl.isProfitableToHoist(
I);
2453 bool useAA()
override {
return Impl.useAA(); }
2454 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2455 unsigned getRegUsageForType(Type *Ty)
override {
2456 return Impl.getRegUsageForType(Ty);
2458 bool shouldBuildLookupTables()
override {
2459 return Impl.shouldBuildLookupTables();
2461 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2462 return Impl.shouldBuildLookupTablesForConstant(
C);
2464 bool shouldBuildRelLookupTables()
override {
2465 return Impl.shouldBuildRelLookupTables();
2467 bool useColdCCForColdCall(Function &
F)
override {
2468 return Impl.useColdCCForColdCall(
F);
2471 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2472 const APInt &DemandedElts,
2473 bool Insert,
bool Extract,
2475 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2479 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2480 ArrayRef<Type *> Tys,
2482 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2485 bool supportsEfficientVectorElementLoadStore()
override {
2486 return Impl.supportsEfficientVectorElementLoadStore();
2489 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2490 bool supportsTailCallFor(
const CallBase *CB)
override {
2491 return Impl.supportsTailCallFor(CB);
2494 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2495 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2497 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2498 bool IsZeroCmp)
const override {
2499 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2501 bool enableSelectOptimize()
override {
2502 return Impl.enableSelectOptimize();
2504 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2505 return Impl.shouldTreatInstructionLikeSelect(
I);
2507 bool enableInterleavedAccessVectorization()
override {
2508 return Impl.enableInterleavedAccessVectorization();
2510 bool enableMaskedInterleavedAccessVectorization()
override {
2511 return Impl.enableMaskedInterleavedAccessVectorization();
2513 bool isFPVectorizationPotentiallyUnsafe()
override {
2514 return Impl.isFPVectorizationPotentiallyUnsafe();
2516 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2518 unsigned *
Fast)
override {
2523 return Impl.getPopcntSupport(IntTyWidthInBit);
2525 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2527 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2528 return Impl.isExpensiveToSpeculativelyExecute(
I);
2531 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2532 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2535 InstructionCost getFPOpCost(Type *Ty)
override {
2536 return Impl.getFPOpCost(Ty);
2539 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2540 const APInt &Imm, Type *Ty)
override {
2541 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2543 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2545 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2547 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2548 const APInt &Imm, Type *Ty,
2550 Instruction *Inst =
nullptr)
override {
2551 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2554 const APInt &Imm, Type *Ty,
2556 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2558 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2559 const Function &Fn)
const override {
2560 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2562 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2563 return Impl.getNumberOfRegisters(ClassID);
2565 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2566 return Impl.hasConditionalLoadStoreForType(Ty);
2568 unsigned getRegisterClassForType(
bool Vector,
2569 Type *Ty =
nullptr)
const override {
2570 return Impl.getRegisterClassForType(
Vector, Ty);
2572 const char *getRegisterClassName(
unsigned ClassID)
const override {
2573 return Impl.getRegisterClassName(ClassID);
2575 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2576 return Impl.getRegisterBitWidth(K);
2578 unsigned getMinVectorRegisterBitWidth()
const override {
2579 return Impl.getMinVectorRegisterBitWidth();
2581 std::optional<unsigned>
getMaxVScale()
const override {
2582 return Impl.getMaxVScale();
2584 std::optional<unsigned> getVScaleForTuning()
const override {
2585 return Impl.getVScaleForTuning();
2587 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2588 return Impl.isVScaleKnownToBeAPowerOfTwo();
2590 bool shouldMaximizeVectorBandwidth(
2592 return Impl.shouldMaximizeVectorBandwidth(K);
2594 ElementCount getMinimumVF(
unsigned ElemWidth,
2595 bool IsScalable)
const override {
2596 return Impl.getMinimumVF(ElemWidth, IsScalable);
2598 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2599 return Impl.getMaximumVF(ElemWidth, Opcode);
2601 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2602 Type *ScalarValTy)
const override {
2603 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2605 bool shouldConsiderAddressTypePromotion(
2606 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2607 return Impl.shouldConsiderAddressTypePromotion(
2608 I, AllowPromotionWithoutCommonHeader);
2610 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2611 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2612 return Impl.getCacheSize(Level);
2614 std::optional<unsigned>
2615 getCacheAssociativity(
CacheLevel Level)
const override {
2616 return Impl.getCacheAssociativity(Level);
2619 std::optional<unsigned> getMinPageSize()
const override {
2620 return Impl.getMinPageSize();
2625 unsigned getPrefetchDistance()
const override {
2626 return Impl.getPrefetchDistance();
2632 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2633 unsigned NumStridedMemAccesses,
2634 unsigned NumPrefetches,
2635 bool HasCall)
const override {
2636 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2637 NumPrefetches, HasCall);
2643 unsigned getMaxPrefetchIterationsAhead()
const override {
2644 return Impl.getMaxPrefetchIterationsAhead();
2648 bool enableWritePrefetching()
const override {
2649 return Impl.enableWritePrefetching();
2653 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2654 return Impl.shouldPrefetchAddressSpace(AS);
2657 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2658 return Impl.getMaxInterleaveFactor(VF);
2660 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2662 ProfileSummaryInfo *PSI,
2663 BlockFrequencyInfo *BFI)
override {
2664 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2666 InstructionCost getArithmeticInstrCost(
2668 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2669 ArrayRef<const Value *> Args,
2670 const Instruction *CxtI =
nullptr)
override {
2671 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2674 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2676 const SmallBitVector &OpcodeMask,
2678 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2685 ArrayRef<const Value *> Args,
2686 const Instruction *CxtI)
override {
2687 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2690 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2693 const Instruction *
I)
override {
2694 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2696 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2698 unsigned Index)
override {
2699 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2702 const Instruction *
I =
nullptr)
override {
2703 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2705 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2708 const Instruction *
I)
override {
2709 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2711 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2713 unsigned Index, Value *Op0,
2714 Value *Op1)
override {
2715 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2717 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2719 unsigned Index)
override {
2723 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2724 const APInt &DemandedDstElts,
2726 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2729 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2732 OperandValueInfo OpInfo,
2733 const Instruction *
I)
override {
2737 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2740 const Instruction *
I)
override {
2741 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2744 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2747 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2751 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2752 bool VariableMask, Align Alignment,
2754 const Instruction *
I =
nullptr)
override {
2755 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2759 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2760 bool VariableMask, Align Alignment,
2762 const Instruction *
I =
nullptr)
override {
2763 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2766 InstructionCost getInterleavedMemoryOpCost(
2767 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2769 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2770 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2772 UseMaskForCond, UseMaskForGaps);
2775 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2776 std::optional<FastMathFlags> FMF,
2778 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2783 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2786 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2789 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2793 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2795 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2797 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2799 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2801 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2802 ArrayRef<Type *> Tys,
2806 unsigned getNumberOfParts(Type *Tp)
override {
2807 return Impl.getNumberOfParts(Tp);
2809 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2810 const SCEV *
Ptr)
override {
2811 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2813 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2814 return Impl.getCostOfKeepingLiveOverCall(Tys);
2816 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2817 MemIntrinsicInfo &
Info)
override {
2818 return Impl.getTgtMemIntrinsic(Inst,
Info);
2820 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2821 return Impl.getAtomicMemIntrinsicMaxElementSize();
2823 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2824 Type *ExpectedType)
override {
2825 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2827 Type *getMemcpyLoopLoweringType(
2828 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2829 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2830 std::optional<uint32_t> AtomicElementSize)
const override {
2831 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2832 DestAddrSpace, SrcAlign, DestAlign,
2835 void getMemcpyLoopResidualLoweringType(
2836 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2837 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2838 Align SrcAlign, Align DestAlign,
2839 std::optional<uint32_t> AtomicCpySize)
const override {
2840 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2841 SrcAddrSpace, DestAddrSpace,
2842 SrcAlign, DestAlign, AtomicCpySize);
2845 const Function *Callee)
const override {
2846 return Impl.areInlineCompatible(Caller, Callee);
2848 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2849 unsigned DefaultCallPenalty)
const override {
2850 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2852 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2853 const ArrayRef<Type *> &Types)
const override {
2854 return Impl.areTypesABICompatible(Caller, Callee, Types);
2857 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2860 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2862 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2863 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2865 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2866 return Impl.isLegalToVectorizeLoad(LI);
2868 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2869 return Impl.isLegalToVectorizeStore(SI);
2871 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2872 unsigned AddrSpace)
const override {
2873 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2876 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2877 unsigned AddrSpace)
const override {
2878 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2881 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2882 ElementCount VF)
const override {
2883 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2885 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2886 return Impl.isElementTypeLegalForScalableVector(Ty);
2888 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2889 unsigned ChainSizeInBytes,
2891 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2893 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2894 unsigned ChainSizeInBytes,
2896 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2898 bool preferFixedOverScalableIfEqualCost()
const override {
2899 return Impl.preferFixedOverScalableIfEqualCost();
2901 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2902 ReductionFlags Flags)
const override {
2903 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2905 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2906 ReductionFlags Flags)
const override {
2907 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2909 bool preferEpilogueVectorization()
const override {
2910 return Impl.preferEpilogueVectorization();
2913 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
2914 return Impl.shouldExpandReduction(
II);
2918 getPreferredExpandedReductionShuffle(
const IntrinsicInst *
II)
const override {
2919 return Impl.getPreferredExpandedReductionShuffle(
II);
2922 unsigned getGISelRematGlobalCost()
const override {
2923 return Impl.getGISelRematGlobalCost();
2926 unsigned getMinTripCountTailFoldingThreshold()
const override {
2927 return Impl.getMinTripCountTailFoldingThreshold();
2930 bool supportsScalableVectors()
const override {
2931 return Impl.supportsScalableVectors();
2934 bool enableScalableVectorization()
const override {
2935 return Impl.enableScalableVectorization();
2938 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2939 Align Alignment)
const override {
2940 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2944 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2945 return Impl.getVPLegalizationStrategy(PI);
2948 bool hasArmWideBranch(
bool Thumb)
const override {
2949 return Impl.hasArmWideBranch(Thumb);
2952 unsigned getMaxNumArgs()
const override {
2953 return Impl.getMaxNumArgs();
2957template <
typename T>
2959 : TTIImpl(new Model<
T>(Impl)) {}
2990 : TTICallback(Arg.TTICallback) {}
2992 : TTICallback(
std::
move(Arg.TTICallback)) {}
2994 TTICallback =
RHS.TTICallback;
2998 TTICallback = std::move(
RHS.TTICallback);
3030 std::optional<TargetTransformInfo>
TTI;
3032 virtual void anchor();
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
const MachineOperand & RHS
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL