21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
45class BlockFrequencyInfo;
52class OptimizationRemarkEmitter;
53class InterleavedAccessInfo;
58class LoopVectorizationLegality;
59class ProfileSummaryInfo;
60class RecurrenceDescriptor;
65class TargetLibraryInfo;
123 Type *RetTy =
nullptr;
136 bool TypeBasedOnly =
false);
209class TargetTransformInfo;
334 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
342 const PointersChainInfo &
Info,
Type *AccessTy,
492 std::pair<const Value *, unsigned>
681 KnownBits & Known,
bool &KnownBitsComputed)
const;
688 SimplifyAndSetOp)
const;
732 bool HasBaseReg, int64_t Scale,
734 int64_t ScalableOffset = 0)
const;
853 unsigned AddrSpace = 0)
const;
897 const APInt &DemandedElts,
898 bool Insert,
bool Extract,
962 bool IsZeroCmp)
const;
994 unsigned *
Fast =
nullptr)
const;
1179 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1190 Type *ScalarValTy)
const;
1196 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1243 unsigned NumStridedMemAccesses,
1244 unsigned NumPrefetches,
bool HasCall)
const;
1284 unsigned Opcode,
Type *Ty,
1288 ArrayRef<const Value *>
Args = std::nullopt,
1289 const Instruction *CxtI =
nullptr,
1290 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1300 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1301 const SmallBitVector &OpcodeMask,
1315 VectorType *SubTp =
nullptr, ArrayRef<const Value *> Args = std::nullopt,
1316 const Instruction *CxtI =
nullptr)
const;
1367 unsigned Index)
const;
1395 Value *Op1 =
nullptr)
const;
1405 unsigned Index = -1)
const;
1414 const APInt &DemandedDstElts,
1423 const Instruction *
I =
nullptr)
const;
1447 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1461 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1479 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1484 return FMF && !(*FMF).allowReassoc();
1512 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1561 const SCEV *
Ptr =
nullptr)
const;
1585 Type *ExpectedType)
const;
1590 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
1591 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1601 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1603 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1617 unsigned DefaultCallPenalty)
const;
1653 unsigned AddrSpace)
const;
1657 unsigned AddrSpace)
const;
1669 unsigned ChainSizeInBytes,
1675 unsigned ChainSizeInBytes,
1747 Align Alignment)
const;
1806 template <
typename T>
class Model;
1808 std::unique_ptr<Concept> TTIImpl;
1855 virtual std::pair<const Value *, unsigned>
1859 Value *NewV)
const = 0;
1877 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1882 SimplifyAndSetOp) = 0;
1887 int64_t BaseOffset,
bool HasBaseReg,
1888 int64_t Scale,
unsigned AddrSpace,
1890 int64_t ScalableOffset) = 0;
1912 Align Alignment) = 0;
1914 Align Alignment) = 0;
1928 bool HasBaseReg, int64_t Scale,
1929 unsigned AddrSpace) = 0;
1941 const APInt &DemandedElts,
1942 bool Insert,
bool Extract,
1963 unsigned *
Fast) = 0;
1985 Type *Ty =
nullptr)
const = 0;
1995 bool IsScalable)
const = 0;
1996 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1998 Type *ScalarValTy)
const = 0;
2000 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2019 unsigned NumStridedMemAccesses,
2020 unsigned NumPrefetches,
2021 bool HasCall)
const = 0;
2040 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2054 unsigned Index) = 0;
2069 unsigned Index) = 0;
2073 const APInt &DemandedDstElts,
2091 bool VariableMask,
Align Alignment,
2096 bool VariableMask,
Align Alignment,
2103 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2106 std::optional<FastMathFlags> FMF,
2133 Type *ExpectedType) = 0;
2136 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
2137 std::optional<uint32_t> AtomicElementSize)
const = 0;
2141 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2143 std::optional<uint32_t> AtomicCpySize)
const = 0;
2147 unsigned DefaultCallPenalty)
const = 0;
2158 unsigned AddrSpace)
const = 0;
2161 unsigned AddrSpace)
const = 0;
2166 unsigned ChainSizeInBytes,
2169 unsigned ChainSizeInBytes,
2186 Align Alignment)
const = 0;
2193template <
typename T>
2198 Model(
T Impl) : Impl(std::move(Impl)) {}
2199 ~Model()
override =
default;
2201 const DataLayout &getDataLayout()
const override {
2202 return Impl.getDataLayout();
2206 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2207 ArrayRef<const Value *>
Operands, Type *AccessType,
2211 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2213 const PointersChainInfo &
Info,
2218 unsigned getInliningThresholdMultiplier()
const override {
2219 return Impl.getInliningThresholdMultiplier();
2221 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2222 return Impl.adjustInliningThreshold(CB);
2224 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2225 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2227 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2228 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2230 int getInlinerVectorBonusPercent()
const override {
2231 return Impl.getInlinerVectorBonusPercent();
2233 unsigned getCallerAllocaCost(
const CallBase *CB,
2234 const AllocaInst *AI)
const override {
2235 return Impl.getCallerAllocaCost(CB, AI);
2237 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2238 return Impl.getMemcpyCost(
I);
2241 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2242 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2245 InstructionCost getInstructionCost(
const User *U,
2250 BranchProbability getPredictableBranchThreshold()
override {
2251 return Impl.getPredictableBranchThreshold();
2253 InstructionCost getBranchMispredictPenalty()
override {
2254 return Impl.getBranchMispredictPenalty();
2256 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2257 return Impl.hasBranchDivergence(
F);
2259 bool isSourceOfDivergence(
const Value *V)
override {
2260 return Impl.isSourceOfDivergence(V);
2263 bool isAlwaysUniform(
const Value *V)
override {
2264 return Impl.isAlwaysUniform(V);
2267 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2268 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2271 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2272 return Impl.addrspacesMayAlias(AS0, AS1);
2275 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2277 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2279 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2282 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2283 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2287 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2288 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2291 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2292 return Impl.getAssumedAddrSpace(V);
2295 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2297 std::pair<const Value *, unsigned>
2298 getPredicatedAddrSpace(
const Value *V)
const override {
2299 return Impl.getPredicatedAddrSpace(V);
2302 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2303 Value *NewV)
const override {
2304 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2307 bool isLoweredToCall(
const Function *
F)
override {
2308 return Impl.isLoweredToCall(
F);
2310 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2311 UnrollingPreferences &UP,
2312 OptimizationRemarkEmitter *ORE)
override {
2313 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2315 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2316 PeelingPreferences &PP)
override {
2317 return Impl.getPeelingPreferences(L, SE, PP);
2319 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2320 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2321 HardwareLoopInfo &HWLoopInfo)
override {
2322 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2324 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2325 return Impl.preferPredicateOverEpilogue(TFI);
2328 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2329 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2331 std::optional<Instruction *>
2332 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2333 return Impl.instCombineIntrinsic(IC,
II);
2335 std::optional<Value *>
2336 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2337 APInt DemandedMask, KnownBits &Known,
2338 bool &KnownBitsComputed)
override {
2339 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2342 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2343 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2344 APInt &UndefElts2, APInt &UndefElts3,
2345 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2346 SimplifyAndSetOp)
override {
2347 return Impl.simplifyDemandedVectorEltsIntrinsic(
2348 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2351 bool isLegalAddImmediate(int64_t Imm)
override {
2352 return Impl.isLegalAddImmediate(Imm);
2354 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2355 return Impl.isLegalAddScalableImmediate(Imm);
2357 bool isLegalICmpImmediate(int64_t Imm)
override {
2358 return Impl.isLegalICmpImmediate(Imm);
2360 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2361 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2362 Instruction *
I, int64_t ScalableOffset)
override {
2363 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2364 AddrSpace,
I, ScalableOffset);
2366 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2367 const TargetTransformInfo::LSRCost &C2)
override {
2368 return Impl.isLSRCostLess(C1, C2);
2370 bool isNumRegsMajorCostOfLSR()
override {
2371 return Impl.isNumRegsMajorCostOfLSR();
2373 bool shouldFoldTerminatingConditionAfterLSR()
const override {
2374 return Impl.shouldFoldTerminatingConditionAfterLSR();
2376 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2377 return Impl.shouldDropLSRSolutionIfLessProfitable();
2379 bool isProfitableLSRChainElement(Instruction *
I)
override {
2380 return Impl.isProfitableLSRChainElement(
I);
2382 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2383 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2384 DominatorTree *DT, AssumptionCache *AC,
2385 TargetLibraryInfo *LibInfo)
override {
2386 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2389 getPreferredAddressingMode(
const Loop *L,
2390 ScalarEvolution *SE)
const override {
2391 return Impl.getPreferredAddressingMode(L, SE);
2393 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2394 return Impl.isLegalMaskedStore(DataType, Alignment);
2396 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2397 return Impl.isLegalMaskedLoad(DataType, Alignment);
2399 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2400 return Impl.isLegalNTStore(DataType, Alignment);
2402 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2403 return Impl.isLegalNTLoad(DataType, Alignment);
2405 bool isLegalBroadcastLoad(Type *ElementTy,
2406 ElementCount NumElements)
const override {
2407 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2409 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2410 return Impl.isLegalMaskedScatter(DataType, Alignment);
2412 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2413 return Impl.isLegalMaskedGather(DataType, Alignment);
2415 bool forceScalarizeMaskedGather(
VectorType *DataType,
2416 Align Alignment)
override {
2417 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2419 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2420 Align Alignment)
override {
2421 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2423 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2424 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2426 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2427 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2429 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2430 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2432 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2433 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2435 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2436 const SmallBitVector &OpcodeMask)
const override {
2437 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2439 bool enableOrderedReductions()
override {
2440 return Impl.enableOrderedReductions();
2442 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2443 return Impl.hasDivRemOp(DataType, IsSigned);
2445 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2446 return Impl.hasVolatileVariant(
I, AddrSpace);
2448 bool prefersVectorizedAddressing()
override {
2449 return Impl.prefersVectorizedAddressing();
2451 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2452 StackOffset BaseOffset,
bool HasBaseReg,
2454 unsigned AddrSpace)
override {
2455 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2458 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2459 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2460 return Impl.isTruncateFree(Ty1, Ty2);
2462 bool isProfitableToHoist(Instruction *
I)
override {
2463 return Impl.isProfitableToHoist(
I);
2465 bool useAA()
override {
return Impl.useAA(); }
2466 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2467 unsigned getRegUsageForType(Type *Ty)
override {
2468 return Impl.getRegUsageForType(Ty);
2470 bool shouldBuildLookupTables()
override {
2471 return Impl.shouldBuildLookupTables();
2473 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2474 return Impl.shouldBuildLookupTablesForConstant(
C);
2476 bool shouldBuildRelLookupTables()
override {
2477 return Impl.shouldBuildRelLookupTables();
2479 bool useColdCCForColdCall(Function &
F)
override {
2480 return Impl.useColdCCForColdCall(
F);
2483 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2484 const APInt &DemandedElts,
2485 bool Insert,
bool Extract,
2487 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2491 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2492 ArrayRef<Type *> Tys,
2494 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2497 bool supportsEfficientVectorElementLoadStore()
override {
2498 return Impl.supportsEfficientVectorElementLoadStore();
2501 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2502 bool supportsTailCallFor(
const CallBase *CB)
override {
2503 return Impl.supportsTailCallFor(CB);
2506 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2507 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2509 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2510 bool IsZeroCmp)
const override {
2511 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2513 bool enableSelectOptimize()
override {
2514 return Impl.enableSelectOptimize();
2516 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2517 return Impl.shouldTreatInstructionLikeSelect(
I);
2519 bool enableInterleavedAccessVectorization()
override {
2520 return Impl.enableInterleavedAccessVectorization();
2522 bool enableMaskedInterleavedAccessVectorization()
override {
2523 return Impl.enableMaskedInterleavedAccessVectorization();
2525 bool isFPVectorizationPotentiallyUnsafe()
override {
2526 return Impl.isFPVectorizationPotentiallyUnsafe();
2528 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2530 unsigned *
Fast)
override {
2535 return Impl.getPopcntSupport(IntTyWidthInBit);
2537 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2539 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2540 return Impl.isExpensiveToSpeculativelyExecute(
I);
2543 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2544 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2547 InstructionCost getFPOpCost(Type *Ty)
override {
2548 return Impl.getFPOpCost(Ty);
2551 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2552 const APInt &Imm, Type *Ty)
override {
2553 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2555 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2557 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2559 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2560 const APInt &Imm, Type *Ty,
2562 Instruction *Inst =
nullptr)
override {
2563 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2566 const APInt &Imm, Type *Ty,
2568 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2570 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2571 const Function &Fn)
const override {
2572 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2574 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2575 return Impl.getNumberOfRegisters(ClassID);
2577 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2578 return Impl.hasConditionalLoadStoreForType(Ty);
2580 unsigned getRegisterClassForType(
bool Vector,
2581 Type *Ty =
nullptr)
const override {
2582 return Impl.getRegisterClassForType(
Vector, Ty);
2584 const char *getRegisterClassName(
unsigned ClassID)
const override {
2585 return Impl.getRegisterClassName(ClassID);
2587 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2588 return Impl.getRegisterBitWidth(K);
2590 unsigned getMinVectorRegisterBitWidth()
const override {
2591 return Impl.getMinVectorRegisterBitWidth();
2593 std::optional<unsigned>
getMaxVScale()
const override {
2594 return Impl.getMaxVScale();
2596 std::optional<unsigned> getVScaleForTuning()
const override {
2597 return Impl.getVScaleForTuning();
2599 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2600 return Impl.isVScaleKnownToBeAPowerOfTwo();
2602 bool shouldMaximizeVectorBandwidth(
2604 return Impl.shouldMaximizeVectorBandwidth(K);
2606 ElementCount getMinimumVF(
unsigned ElemWidth,
2607 bool IsScalable)
const override {
2608 return Impl.getMinimumVF(ElemWidth, IsScalable);
2610 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2611 return Impl.getMaximumVF(ElemWidth, Opcode);
2613 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2614 Type *ScalarValTy)
const override {
2615 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2617 bool shouldConsiderAddressTypePromotion(
2618 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2619 return Impl.shouldConsiderAddressTypePromotion(
2620 I, AllowPromotionWithoutCommonHeader);
2622 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2623 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2624 return Impl.getCacheSize(Level);
2626 std::optional<unsigned>
2627 getCacheAssociativity(
CacheLevel Level)
const override {
2628 return Impl.getCacheAssociativity(Level);
2631 std::optional<unsigned> getMinPageSize()
const override {
2632 return Impl.getMinPageSize();
2637 unsigned getPrefetchDistance()
const override {
2638 return Impl.getPrefetchDistance();
2644 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2645 unsigned NumStridedMemAccesses,
2646 unsigned NumPrefetches,
2647 bool HasCall)
const override {
2648 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2649 NumPrefetches, HasCall);
2655 unsigned getMaxPrefetchIterationsAhead()
const override {
2656 return Impl.getMaxPrefetchIterationsAhead();
2660 bool enableWritePrefetching()
const override {
2661 return Impl.enableWritePrefetching();
2665 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2666 return Impl.shouldPrefetchAddressSpace(AS);
2669 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2670 return Impl.getMaxInterleaveFactor(VF);
2672 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2674 ProfileSummaryInfo *PSI,
2675 BlockFrequencyInfo *BFI)
override {
2676 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2678 InstructionCost getArithmeticInstrCost(
2680 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2681 ArrayRef<const Value *> Args,
2682 const Instruction *CxtI =
nullptr)
override {
2683 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2686 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2688 const SmallBitVector &OpcodeMask,
2690 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2697 ArrayRef<const Value *> Args,
2698 const Instruction *CxtI)
override {
2699 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2702 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2705 const Instruction *
I)
override {
2706 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2708 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2710 unsigned Index)
override {
2711 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2714 const Instruction *
I =
nullptr)
override {
2715 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2717 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2720 const Instruction *
I)
override {
2721 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2723 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2725 unsigned Index, Value *Op0,
2726 Value *Op1)
override {
2727 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2729 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2731 unsigned Index)
override {
2735 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2736 const APInt &DemandedDstElts,
2738 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2741 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2744 OperandValueInfo OpInfo,
2745 const Instruction *
I)
override {
2749 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2752 const Instruction *
I)
override {
2753 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2756 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2759 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2763 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2764 bool VariableMask, Align Alignment,
2766 const Instruction *
I =
nullptr)
override {
2767 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2771 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2772 bool VariableMask, Align Alignment,
2774 const Instruction *
I =
nullptr)
override {
2775 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2778 InstructionCost getInterleavedMemoryOpCost(
2779 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2781 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2782 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2784 UseMaskForCond, UseMaskForGaps);
2787 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2788 std::optional<FastMathFlags> FMF,
2790 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2795 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2798 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2801 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2805 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2807 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2809 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2811 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2813 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2814 ArrayRef<Type *> Tys,
2818 unsigned getNumberOfParts(Type *Tp)
override {
2819 return Impl.getNumberOfParts(Tp);
2821 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2822 const SCEV *
Ptr)
override {
2823 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2825 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2826 return Impl.getCostOfKeepingLiveOverCall(Tys);
2828 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2829 MemIntrinsicInfo &
Info)
override {
2830 return Impl.getTgtMemIntrinsic(Inst,
Info);
2832 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2833 return Impl.getAtomicMemIntrinsicMaxElementSize();
2835 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2836 Type *ExpectedType)
override {
2837 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2839 Type *getMemcpyLoopLoweringType(
2840 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2841 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2842 std::optional<uint32_t> AtomicElementSize)
const override {
2843 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2844 DestAddrSpace, SrcAlign, DestAlign,
2847 void getMemcpyLoopResidualLoweringType(
2848 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2849 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2850 Align SrcAlign, Align DestAlign,
2851 std::optional<uint32_t> AtomicCpySize)
const override {
2852 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2853 SrcAddrSpace, DestAddrSpace,
2854 SrcAlign, DestAlign, AtomicCpySize);
2857 const Function *Callee)
const override {
2858 return Impl.areInlineCompatible(Caller, Callee);
2860 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2861 unsigned DefaultCallPenalty)
const override {
2862 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2864 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2865 const ArrayRef<Type *> &Types)
const override {
2866 return Impl.areTypesABICompatible(Caller, Callee, Types);
2869 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2872 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2874 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2875 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2877 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2878 return Impl.isLegalToVectorizeLoad(LI);
2880 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2881 return Impl.isLegalToVectorizeStore(SI);
2883 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2884 unsigned AddrSpace)
const override {
2885 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2888 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2889 unsigned AddrSpace)
const override {
2890 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2893 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2894 ElementCount VF)
const override {
2895 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2897 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2898 return Impl.isElementTypeLegalForScalableVector(Ty);
2900 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2901 unsigned ChainSizeInBytes,
2903 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2905 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2906 unsigned ChainSizeInBytes,
2908 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2910 bool preferFixedOverScalableIfEqualCost()
const override {
2911 return Impl.preferFixedOverScalableIfEqualCost();
2913 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2914 ReductionFlags Flags)
const override {
2915 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2917 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2918 ReductionFlags Flags)
const override {
2919 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2921 bool preferEpilogueVectorization()
const override {
2922 return Impl.preferEpilogueVectorization();
2925 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
2926 return Impl.shouldExpandReduction(
II);
2930 getPreferredExpandedReductionShuffle(
const IntrinsicInst *
II)
const override {
2931 return Impl.getPreferredExpandedReductionShuffle(
II);
2934 unsigned getGISelRematGlobalCost()
const override {
2935 return Impl.getGISelRematGlobalCost();
2938 unsigned getMinTripCountTailFoldingThreshold()
const override {
2939 return Impl.getMinTripCountTailFoldingThreshold();
2942 bool supportsScalableVectors()
const override {
2943 return Impl.supportsScalableVectors();
2946 bool enableScalableVectorization()
const override {
2947 return Impl.enableScalableVectorization();
2950 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2951 Align Alignment)
const override {
2952 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2956 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2957 return Impl.getVPLegalizationStrategy(PI);
2960 bool hasArmWideBranch(
bool Thumb)
const override {
2961 return Impl.hasArmWideBranch(Thumb);
2964 unsigned getMaxNumArgs()
const override {
2965 return Impl.getMaxNumArgs();
2969template <
typename T>
2971 : TTIImpl(new Model<
T>(Impl)) {}
3002 : TTICallback(Arg.TTICallback) {}
3004 : TTICallback(
std::
move(Arg.TTICallback)) {}
3006 TTICallback =
RHS.TTICallback;
3010 TTICallback = std::move(
RHS.TTICallback);
3042 std::optional<TargetTransformInfo>
TTI;
3044 virtual void anchor();
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL