21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
44class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
64class TargetLibraryInfo;
122 Type *RetTy =
nullptr;
135 bool TypeBasedOnly =
false);
208class TargetTransformInfo;
333 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
341 const PointersChainInfo &
Info,
Type *AccessTy,
485 std::pair<const Value *, unsigned>
674 KnownBits & Known,
bool &KnownBitsComputed)
const;
681 SimplifyAndSetOp)
const;
725 bool HasBaseReg, int64_t Scale,
727 int64_t ScalableOffset = 0)
const;
837 int64_t BaseOffset,
bool HasBaseReg,
839 unsigned AddrSpace = 0)
const;
883 const APInt &DemandedElts,
884 bool Insert,
bool Extract,
948 bool IsZeroCmp)
const;
980 unsigned *
Fast =
nullptr)
const;
1161 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1172 Type *ScalarValTy)
const;
1178 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1225 unsigned NumStridedMemAccesses,
1226 unsigned NumPrefetches,
bool HasCall)
const;
1266 unsigned Opcode,
Type *Ty,
1270 ArrayRef<const Value *>
Args = std::nullopt,
1271 const Instruction *CxtI =
nullptr,
1272 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1282 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1283 const SmallBitVector &OpcodeMask,
1297 VectorType *SubTp =
nullptr, ArrayRef<const Value *> Args = std::nullopt,
1298 const Instruction *CxtI =
nullptr)
const;
1349 unsigned Index)
const;
1377 Value *Op1 =
nullptr)
const;
1387 unsigned Index = -1)
const;
1396 const APInt &DemandedDstElts,
1405 const Instruction *
I =
nullptr)
const;
1429 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1443 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1461 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1466 return FMF && !(*FMF).allowReassoc();
1494 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1543 const SCEV *
Ptr =
nullptr)
const;
1567 Type *ExpectedType)
const;
1572 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1573 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1583 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1584 unsigned SrcAlign,
unsigned DestAlign,
1585 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1599 unsigned DefaultCallPenalty)
const;
1635 unsigned AddrSpace)
const;
1639 unsigned AddrSpace)
const;
1651 unsigned ChainSizeInBytes,
1657 unsigned ChainSizeInBytes,
1717 Align Alignment)
const;
1776 template <
typename T>
class Model;
1778 std::unique_ptr<Concept> TTIImpl;
1824 virtual std::pair<const Value *, unsigned>
1828 Value *NewV)
const = 0;
1846 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1851 SimplifyAndSetOp) = 0;
1856 int64_t BaseOffset,
bool HasBaseReg,
1857 int64_t Scale,
unsigned AddrSpace,
1859 int64_t ScalableOffset) = 0;
1880 Align Alignment) = 0;
1882 Align Alignment) = 0;
1895 bool HasBaseReg, int64_t Scale,
1896 unsigned AddrSpace) = 0;
1908 const APInt &DemandedElts,
1909 bool Insert,
bool Extract,
1930 unsigned *
Fast) = 0;
1951 Type *Ty =
nullptr)
const = 0;
1961 bool IsScalable)
const = 0;
1962 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1964 Type *ScalarValTy)
const = 0;
1966 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1985 unsigned NumStridedMemAccesses,
1986 unsigned NumPrefetches,
1987 bool HasCall)
const = 0;
2006 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2020 unsigned Index) = 0;
2035 unsigned Index) = 0;
2039 const APInt &DemandedDstElts,
2057 bool VariableMask,
Align Alignment,
2062 bool VariableMask,
Align Alignment,
2069 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2072 std::optional<FastMathFlags> FMF,
2099 Type *ExpectedType) = 0;
2102 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2103 std::optional<uint32_t> AtomicElementSize)
const = 0;
2107 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2108 unsigned SrcAlign,
unsigned DestAlign,
2109 std::optional<uint32_t> AtomicCpySize)
const = 0;
2113 unsigned DefaultCallPenalty)
const = 0;
2124 unsigned AddrSpace)
const = 0;
2127 unsigned AddrSpace)
const = 0;
2132 unsigned ChainSizeInBytes,
2135 unsigned ChainSizeInBytes,
2149 Align Alignment)
const = 0;
2156template <
typename T>
2161 Model(
T Impl) : Impl(std::move(Impl)) {}
2162 ~Model()
override =
default;
2164 const DataLayout &getDataLayout()
const override {
2165 return Impl.getDataLayout();
2169 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2170 ArrayRef<const Value *>
Operands, Type *AccessType,
2174 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2176 const PointersChainInfo &
Info,
2181 unsigned getInliningThresholdMultiplier()
const override {
2182 return Impl.getInliningThresholdMultiplier();
2184 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2185 return Impl.adjustInliningThreshold(CB);
2187 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2188 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2190 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2191 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2193 int getInlinerVectorBonusPercent()
const override {
2194 return Impl.getInlinerVectorBonusPercent();
2196 unsigned getCallerAllocaCost(
const CallBase *CB,
2197 const AllocaInst *AI)
const override {
2198 return Impl.getCallerAllocaCost(CB, AI);
2200 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2201 return Impl.getMemcpyCost(
I);
2204 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2205 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2208 InstructionCost getInstructionCost(
const User *U,
2213 BranchProbability getPredictableBranchThreshold()
override {
2214 return Impl.getPredictableBranchThreshold();
2216 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2217 return Impl.hasBranchDivergence(
F);
2219 bool isSourceOfDivergence(
const Value *V)
override {
2220 return Impl.isSourceOfDivergence(V);
2223 bool isAlwaysUniform(
const Value *V)
override {
2224 return Impl.isAlwaysUniform(V);
2227 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2228 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2231 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2232 return Impl.addrspacesMayAlias(AS0, AS1);
2235 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2237 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2239 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2242 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2243 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2247 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2248 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2251 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2252 return Impl.getAssumedAddrSpace(V);
2255 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2257 std::pair<const Value *, unsigned>
2258 getPredicatedAddrSpace(
const Value *V)
const override {
2259 return Impl.getPredicatedAddrSpace(V);
2262 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2263 Value *NewV)
const override {
2264 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2267 bool isLoweredToCall(
const Function *
F)
override {
2268 return Impl.isLoweredToCall(
F);
2270 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2271 UnrollingPreferences &UP,
2272 OptimizationRemarkEmitter *ORE)
override {
2273 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2275 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2276 PeelingPreferences &PP)
override {
2277 return Impl.getPeelingPreferences(L, SE, PP);
2279 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2280 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2281 HardwareLoopInfo &HWLoopInfo)
override {
2282 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2284 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2285 return Impl.preferPredicateOverEpilogue(TFI);
2288 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2289 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2291 std::optional<Instruction *>
2292 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
override {
2293 return Impl.instCombineIntrinsic(IC, II);
2295 std::optional<Value *>
2296 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2297 APInt DemandedMask, KnownBits &Known,
2298 bool &KnownBitsComputed)
override {
2299 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2302 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2303 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2304 APInt &UndefElts2, APInt &UndefElts3,
2305 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2306 SimplifyAndSetOp)
override {
2307 return Impl.simplifyDemandedVectorEltsIntrinsic(
2308 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2311 bool isLegalAddImmediate(int64_t Imm)
override {
2312 return Impl.isLegalAddImmediate(Imm);
2314 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2315 return Impl.isLegalAddScalableImmediate(Imm);
2317 bool isLegalICmpImmediate(int64_t Imm)
override {
2318 return Impl.isLegalICmpImmediate(Imm);
2320 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2321 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2322 Instruction *
I, int64_t ScalableOffset)
override {
2323 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2324 AddrSpace,
I, ScalableOffset);
2326 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2327 const TargetTransformInfo::LSRCost &C2)
override {
2328 return Impl.isLSRCostLess(C1, C2);
2330 bool isNumRegsMajorCostOfLSR()
override {
2331 return Impl.isNumRegsMajorCostOfLSR();
2333 bool shouldFoldTerminatingConditionAfterLSR()
const override {
2334 return Impl.shouldFoldTerminatingConditionAfterLSR();
2336 bool isProfitableLSRChainElement(Instruction *
I)
override {
2337 return Impl.isProfitableLSRChainElement(
I);
2339 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2340 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2341 DominatorTree *DT, AssumptionCache *AC,
2342 TargetLibraryInfo *LibInfo)
override {
2343 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2346 getPreferredAddressingMode(
const Loop *L,
2347 ScalarEvolution *SE)
const override {
2348 return Impl.getPreferredAddressingMode(L, SE);
2350 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2351 return Impl.isLegalMaskedStore(DataType, Alignment);
2353 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2354 return Impl.isLegalMaskedLoad(DataType, Alignment);
2356 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2357 return Impl.isLegalNTStore(DataType, Alignment);
2359 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2360 return Impl.isLegalNTLoad(DataType, Alignment);
2362 bool isLegalBroadcastLoad(Type *ElementTy,
2363 ElementCount NumElements)
const override {
2364 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2366 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2367 return Impl.isLegalMaskedScatter(DataType, Alignment);
2369 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2370 return Impl.isLegalMaskedGather(DataType, Alignment);
2372 bool forceScalarizeMaskedGather(
VectorType *DataType,
2373 Align Alignment)
override {
2374 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2376 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2377 Align Alignment)
override {
2378 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2380 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2381 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2383 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2384 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2386 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2387 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2389 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2390 const SmallBitVector &OpcodeMask)
const override {
2391 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2393 bool enableOrderedReductions()
override {
2394 return Impl.enableOrderedReductions();
2396 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2397 return Impl.hasDivRemOp(DataType, IsSigned);
2399 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2400 return Impl.hasVolatileVariant(
I, AddrSpace);
2402 bool prefersVectorizedAddressing()
override {
2403 return Impl.prefersVectorizedAddressing();
2405 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2406 int64_t BaseOffset,
bool HasBaseReg,
2408 unsigned AddrSpace)
override {
2409 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2412 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2413 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2414 return Impl.isTruncateFree(Ty1, Ty2);
2416 bool isProfitableToHoist(Instruction *
I)
override {
2417 return Impl.isProfitableToHoist(
I);
2419 bool useAA()
override {
return Impl.useAA(); }
2420 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2421 unsigned getRegUsageForType(Type *Ty)
override {
2422 return Impl.getRegUsageForType(Ty);
2424 bool shouldBuildLookupTables()
override {
2425 return Impl.shouldBuildLookupTables();
2427 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2428 return Impl.shouldBuildLookupTablesForConstant(
C);
2430 bool shouldBuildRelLookupTables()
override {
2431 return Impl.shouldBuildRelLookupTables();
2433 bool useColdCCForColdCall(Function &
F)
override {
2434 return Impl.useColdCCForColdCall(
F);
2437 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2438 const APInt &DemandedElts,
2439 bool Insert,
bool Extract,
2441 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2445 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2446 ArrayRef<Type *> Tys,
2448 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2451 bool supportsEfficientVectorElementLoadStore()
override {
2452 return Impl.supportsEfficientVectorElementLoadStore();
2455 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2456 bool supportsTailCallFor(
const CallBase *CB)
override {
2457 return Impl.supportsTailCallFor(CB);
2460 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2461 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2463 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2464 bool IsZeroCmp)
const override {
2465 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2467 bool enableSelectOptimize()
override {
2468 return Impl.enableSelectOptimize();
2470 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2471 return Impl.shouldTreatInstructionLikeSelect(
I);
2473 bool enableInterleavedAccessVectorization()
override {
2474 return Impl.enableInterleavedAccessVectorization();
2476 bool enableMaskedInterleavedAccessVectorization()
override {
2477 return Impl.enableMaskedInterleavedAccessVectorization();
2479 bool isFPVectorizationPotentiallyUnsafe()
override {
2480 return Impl.isFPVectorizationPotentiallyUnsafe();
2482 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2484 unsigned *
Fast)
override {
2489 return Impl.getPopcntSupport(IntTyWidthInBit);
2491 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2493 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2494 return Impl.isExpensiveToSpeculativelyExecute(
I);
2497 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2498 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2501 InstructionCost getFPOpCost(Type *Ty)
override {
2502 return Impl.getFPOpCost(Ty);
2505 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2506 const APInt &Imm, Type *Ty)
override {
2507 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2509 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2511 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2513 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2514 const APInt &Imm, Type *Ty,
2516 Instruction *Inst =
nullptr)
override {
2517 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2520 const APInt &Imm, Type *Ty,
2522 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2524 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2525 const Function &Fn)
const override {
2526 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2528 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2529 return Impl.getNumberOfRegisters(ClassID);
2531 unsigned getRegisterClassForType(
bool Vector,
2532 Type *Ty =
nullptr)
const override {
2533 return Impl.getRegisterClassForType(
Vector, Ty);
2535 const char *getRegisterClassName(
unsigned ClassID)
const override {
2536 return Impl.getRegisterClassName(ClassID);
2538 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2539 return Impl.getRegisterBitWidth(K);
2541 unsigned getMinVectorRegisterBitWidth()
const override {
2542 return Impl.getMinVectorRegisterBitWidth();
2544 std::optional<unsigned>
getMaxVScale()
const override {
2545 return Impl.getMaxVScale();
2547 std::optional<unsigned> getVScaleForTuning()
const override {
2548 return Impl.getVScaleForTuning();
2550 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2551 return Impl.isVScaleKnownToBeAPowerOfTwo();
2553 bool shouldMaximizeVectorBandwidth(
2555 return Impl.shouldMaximizeVectorBandwidth(K);
2557 ElementCount getMinimumVF(
unsigned ElemWidth,
2558 bool IsScalable)
const override {
2559 return Impl.getMinimumVF(ElemWidth, IsScalable);
2561 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2562 return Impl.getMaximumVF(ElemWidth, Opcode);
2564 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2565 Type *ScalarValTy)
const override {
2566 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2568 bool shouldConsiderAddressTypePromotion(
2569 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2570 return Impl.shouldConsiderAddressTypePromotion(
2571 I, AllowPromotionWithoutCommonHeader);
2573 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2574 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2575 return Impl.getCacheSize(Level);
2577 std::optional<unsigned>
2578 getCacheAssociativity(
CacheLevel Level)
const override {
2579 return Impl.getCacheAssociativity(Level);
2582 std::optional<unsigned> getMinPageSize()
const override {
2583 return Impl.getMinPageSize();
2588 unsigned getPrefetchDistance()
const override {
2589 return Impl.getPrefetchDistance();
2595 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2596 unsigned NumStridedMemAccesses,
2597 unsigned NumPrefetches,
2598 bool HasCall)
const override {
2599 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2600 NumPrefetches, HasCall);
2606 unsigned getMaxPrefetchIterationsAhead()
const override {
2607 return Impl.getMaxPrefetchIterationsAhead();
2611 bool enableWritePrefetching()
const override {
2612 return Impl.enableWritePrefetching();
2616 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2617 return Impl.shouldPrefetchAddressSpace(AS);
2620 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2621 return Impl.getMaxInterleaveFactor(VF);
2623 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2625 ProfileSummaryInfo *PSI,
2626 BlockFrequencyInfo *BFI)
override {
2627 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2629 InstructionCost getArithmeticInstrCost(
2631 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2632 ArrayRef<const Value *> Args,
2633 const Instruction *CxtI =
nullptr)
override {
2634 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2637 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2639 const SmallBitVector &OpcodeMask,
2641 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2648 ArrayRef<const Value *> Args,
2649 const Instruction *CxtI)
override {
2650 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2653 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2656 const Instruction *
I)
override {
2657 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2659 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2661 unsigned Index)
override {
2662 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2665 const Instruction *
I =
nullptr)
override {
2666 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2668 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2671 const Instruction *
I)
override {
2672 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2674 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2676 unsigned Index, Value *Op0,
2677 Value *Op1)
override {
2678 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2680 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2682 unsigned Index)
override {
2686 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2687 const APInt &DemandedDstElts,
2689 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2692 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2695 OperandValueInfo OpInfo,
2696 const Instruction *
I)
override {
2700 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2703 const Instruction *
I)
override {
2704 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2707 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2710 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2714 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2715 bool VariableMask, Align Alignment,
2717 const Instruction *
I =
nullptr)
override {
2718 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2722 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2723 bool VariableMask, Align Alignment,
2725 const Instruction *
I =
nullptr)
override {
2726 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2729 InstructionCost getInterleavedMemoryOpCost(
2730 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2732 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2733 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2735 UseMaskForCond, UseMaskForGaps);
2738 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2739 std::optional<FastMathFlags> FMF,
2741 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2746 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2749 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2752 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2756 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2758 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2760 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2762 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2764 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2765 ArrayRef<Type *> Tys,
2769 unsigned getNumberOfParts(Type *Tp)
override {
2770 return Impl.getNumberOfParts(Tp);
2772 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2773 const SCEV *
Ptr)
override {
2774 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2776 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2777 return Impl.getCostOfKeepingLiveOverCall(Tys);
2779 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2780 MemIntrinsicInfo &
Info)
override {
2781 return Impl.getTgtMemIntrinsic(Inst,
Info);
2783 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2784 return Impl.getAtomicMemIntrinsicMaxElementSize();
2786 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2787 Type *ExpectedType)
override {
2788 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2790 Type *getMemcpyLoopLoweringType(
2791 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2792 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2793 std::optional<uint32_t> AtomicElementSize)
const override {
2794 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2795 DestAddrSpace, SrcAlign, DestAlign,
2798 void getMemcpyLoopResidualLoweringType(
2799 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2800 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2801 unsigned SrcAlign,
unsigned DestAlign,
2802 std::optional<uint32_t> AtomicCpySize)
const override {
2803 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2804 SrcAddrSpace, DestAddrSpace,
2805 SrcAlign, DestAlign, AtomicCpySize);
2808 const Function *Callee)
const override {
2809 return Impl.areInlineCompatible(Caller, Callee);
2811 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2812 unsigned DefaultCallPenalty)
const override {
2813 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2815 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2816 const ArrayRef<Type *> &Types)
const override {
2817 return Impl.areTypesABICompatible(Caller, Callee, Types);
2820 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2823 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2825 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2826 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2828 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2829 return Impl.isLegalToVectorizeLoad(LI);
2831 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2832 return Impl.isLegalToVectorizeStore(SI);
2834 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2835 unsigned AddrSpace)
const override {
2836 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2839 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2840 unsigned AddrSpace)
const override {
2841 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2844 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2845 ElementCount VF)
const override {
2846 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2848 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2849 return Impl.isElementTypeLegalForScalableVector(Ty);
2851 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2852 unsigned ChainSizeInBytes,
2854 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2856 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2857 unsigned ChainSizeInBytes,
2859 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2861 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2862 ReductionFlags Flags)
const override {
2863 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2865 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2866 ReductionFlags Flags)
const override {
2867 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2869 bool preferEpilogueVectorization()
const override {
2870 return Impl.preferEpilogueVectorization();
2873 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
2874 return Impl.shouldExpandReduction(II);
2877 unsigned getGISelRematGlobalCost()
const override {
2878 return Impl.getGISelRematGlobalCost();
2881 unsigned getMinTripCountTailFoldingThreshold()
const override {
2882 return Impl.getMinTripCountTailFoldingThreshold();
2885 bool supportsScalableVectors()
const override {
2886 return Impl.supportsScalableVectors();
2889 bool enableScalableVectorization()
const override {
2890 return Impl.enableScalableVectorization();
2893 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2894 Align Alignment)
const override {
2895 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2899 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2900 return Impl.getVPLegalizationStrategy(PI);
2903 bool hasArmWideBranch(
bool Thumb)
const override {
2904 return Impl.hasArmWideBranch(Thumb);
2907 unsigned getMaxNumArgs()
const override {
2908 return Impl.getMaxNumArgs();
2912template <
typename T>
2914 : TTIImpl(new Model<
T>(Impl)) {}
2945 : TTICallback(Arg.TTICallback) {}
2947 : TTICallback(
std::
move(Arg.TTICallback)) {}
2949 TTICallback =
RHS.TTICallback;
2953 TTICallback = std::move(
RHS.TTICallback);
2985 std::optional<TargetTransformInfo>
TTI;
2987 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL