21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
44class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
64class TargetLibraryInfo;
122 Type *RetTy =
nullptr;
135 bool TypeBasedOnly =
false);
205class TargetTransformInfo;
330 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
338 const PointersChainInfo &
Info,
Type *AccessTy,
479 std::pair<const Value *, unsigned>
664 KnownBits & Known,
bool &KnownBitsComputed)
const;
671 SimplifyAndSetOp)
const;
705 bool HasBaseReg, int64_t Scale,
706 unsigned AddrSpace = 0,
809 int64_t BaseOffset,
bool HasBaseReg,
811 unsigned AddrSpace = 0)
const;
855 const APInt &DemandedElts,
856 bool Insert,
bool Extract,
909 bool IsZeroCmp)
const;
935 unsigned *
Fast =
nullptr)
const;
1106 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1117 Type *ScalarValTy)
const;
1123 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1167 unsigned NumStridedMemAccesses,
1168 unsigned NumPrefetches,
bool HasCall)
const;
1206 unsigned Opcode,
Type *Ty,
1210 ArrayRef<const Value *>
Args = ArrayRef<const Value *>(),
1211 const Instruction *CxtI =
nullptr)
const;
1223 ArrayRef<int> Mask = std::nullopt,
1226 ArrayRef<const Value *> Args = std::nullopt)
const;
1277 unsigned Index)
const;
1305 Value *Op1 =
nullptr)
const;
1315 unsigned Index = -1)
const;
1324 const APInt &DemandedDstElts,
1333 const Instruction *
I =
nullptr)
const;
1357 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1375 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1380 return FMF && !(*FMF).allowReassoc();
1408 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1457 const SCEV *
Ptr =
nullptr)
const;
1481 Type *ExpectedType)
const;
1486 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1487 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1497 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1498 unsigned SrcAlign,
unsigned DestAlign,
1499 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1540 unsigned AddrSpace)
const;
1544 unsigned AddrSpace)
const;
1556 unsigned ChainSizeInBytes,
1562 unsigned ChainSizeInBytes,
1622 Align Alignment)
const;
1681 template <
typename T>
class Model;
1683 std::unique_ptr<Concept> TTIImpl;
1726 virtual std::pair<const Value *, unsigned>
1730 Value *NewV)
const = 0;
1748 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1753 SimplifyAndSetOp) = 0;
1757 int64_t BaseOffset,
bool HasBaseReg,
1758 int64_t Scale,
unsigned AddrSpace,
1779 Align Alignment) = 0;
1781 Align Alignment) = 0;
1793 bool HasBaseReg, int64_t Scale,
1794 unsigned AddrSpace) = 0;
1806 const APInt &DemandedElts,
1807 bool Insert,
bool Extract,
1827 unsigned *
Fast) = 0;
1846 Type *Ty =
nullptr)
const = 0;
1856 bool IsScalable)
const = 0;
1857 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1859 Type *ScalarValTy)
const = 0;
1861 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1879 unsigned NumStridedMemAccesses,
1880 unsigned NumPrefetches,
1881 bool HasCall)
const = 0;
1911 unsigned Index) = 0;
1926 unsigned Index) = 0;
1930 const APInt &DemandedDstElts,
1948 bool VariableMask,
Align Alignment,
1955 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
1958 std::optional<FastMathFlags> FMF,
1985 Type *ExpectedType) = 0;
1988 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1989 std::optional<uint32_t> AtomicElementSize)
const = 0;
1993 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1994 unsigned SrcAlign,
unsigned DestAlign,
1995 std::optional<uint32_t> AtomicCpySize)
const = 0;
2008 unsigned AddrSpace)
const = 0;
2011 unsigned AddrSpace)
const = 0;
2016 unsigned ChainSizeInBytes,
2019 unsigned ChainSizeInBytes,
2033 Align Alignment)
const = 0;
2040template <
typename T>
2045 Model(
T Impl) : Impl(std::move(Impl)) {}
2046 ~Model()
override =
default;
2048 const DataLayout &getDataLayout()
const override {
2049 return Impl.getDataLayout();
2053 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2054 ArrayRef<const Value *>
Operands, Type *AccessType,
2058 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2060 const PointersChainInfo &
Info,
2065 unsigned getInliningThresholdMultiplier()
const override {
2066 return Impl.getInliningThresholdMultiplier();
2068 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2069 return Impl.adjustInliningThreshold(CB);
2071 int getInlinerVectorBonusPercent()
const override {
2072 return Impl.getInlinerVectorBonusPercent();
2074 unsigned getCallerAllocaCost(
const CallBase *CB,
2075 const AllocaInst *AI)
const override {
2076 return Impl.getCallerAllocaCost(CB, AI);
2078 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2079 return Impl.getMemcpyCost(
I);
2082 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2083 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2086 InstructionCost getInstructionCost(
const User *U,
2091 BranchProbability getPredictableBranchThreshold()
override {
2092 return Impl.getPredictableBranchThreshold();
2094 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2095 return Impl.hasBranchDivergence(
F);
2097 bool isSourceOfDivergence(
const Value *V)
override {
2098 return Impl.isSourceOfDivergence(V);
2101 bool isAlwaysUniform(
const Value *V)
override {
2102 return Impl.isAlwaysUniform(V);
2105 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2106 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2109 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2110 return Impl.addrspacesMayAlias(AS0, AS1);
2113 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2115 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2117 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2120 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2121 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2125 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2126 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2129 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2130 return Impl.getAssumedAddrSpace(V);
2133 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2135 std::pair<const Value *, unsigned>
2136 getPredicatedAddrSpace(
const Value *V)
const override {
2137 return Impl.getPredicatedAddrSpace(V);
2140 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2141 Value *NewV)
const override {
2142 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2145 bool isLoweredToCall(
const Function *
F)
override {
2146 return Impl.isLoweredToCall(
F);
2148 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2149 UnrollingPreferences &UP,
2150 OptimizationRemarkEmitter *ORE)
override {
2151 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2153 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2154 PeelingPreferences &PP)
override {
2155 return Impl.getPeelingPreferences(L, SE, PP);
2157 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2158 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2159 HardwareLoopInfo &HWLoopInfo)
override {
2160 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2162 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2163 return Impl.preferPredicateOverEpilogue(TFI);
2166 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2167 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2169 std::optional<Instruction *>
2170 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
override {
2171 return Impl.instCombineIntrinsic(IC, II);
2173 std::optional<Value *>
2174 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2175 APInt DemandedMask, KnownBits &Known,
2176 bool &KnownBitsComputed)
override {
2177 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2180 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2181 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2182 APInt &UndefElts2, APInt &UndefElts3,
2183 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2184 SimplifyAndSetOp)
override {
2185 return Impl.simplifyDemandedVectorEltsIntrinsic(
2186 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2189 bool isLegalAddImmediate(int64_t Imm)
override {
2190 return Impl.isLegalAddImmediate(Imm);
2192 bool isLegalICmpImmediate(int64_t Imm)
override {
2193 return Impl.isLegalICmpImmediate(Imm);
2195 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2196 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2197 Instruction *
I)
override {
2198 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2201 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2202 const TargetTransformInfo::LSRCost &C2)
override {
2203 return Impl.isLSRCostLess(C1, C2);
2205 bool isNumRegsMajorCostOfLSR()
override {
2206 return Impl.isNumRegsMajorCostOfLSR();
2208 bool isProfitableLSRChainElement(Instruction *
I)
override {
2209 return Impl.isProfitableLSRChainElement(
I);
2211 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2212 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2213 DominatorTree *DT, AssumptionCache *AC,
2214 TargetLibraryInfo *LibInfo)
override {
2215 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2218 getPreferredAddressingMode(
const Loop *L,
2219 ScalarEvolution *SE)
const override {
2220 return Impl.getPreferredAddressingMode(L, SE);
2222 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2223 return Impl.isLegalMaskedStore(DataType, Alignment);
2225 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2226 return Impl.isLegalMaskedLoad(DataType, Alignment);
2228 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2229 return Impl.isLegalNTStore(DataType, Alignment);
2231 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2232 return Impl.isLegalNTLoad(DataType, Alignment);
2234 bool isLegalBroadcastLoad(Type *ElementTy,
2235 ElementCount NumElements)
const override {
2236 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2238 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2239 return Impl.isLegalMaskedScatter(DataType, Alignment);
2241 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2242 return Impl.isLegalMaskedGather(DataType, Alignment);
2244 bool forceScalarizeMaskedGather(
VectorType *DataType,
2245 Align Alignment)
override {
2246 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2248 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2249 Align Alignment)
override {
2250 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2252 bool isLegalMaskedCompressStore(Type *DataType)
override {
2253 return Impl.isLegalMaskedCompressStore(DataType);
2255 bool isLegalMaskedExpandLoad(Type *DataType)
override {
2256 return Impl.isLegalMaskedExpandLoad(DataType);
2258 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2259 const SmallBitVector &OpcodeMask)
const override {
2260 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2262 bool enableOrderedReductions()
override {
2263 return Impl.enableOrderedReductions();
2265 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2266 return Impl.hasDivRemOp(DataType, IsSigned);
2268 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2269 return Impl.hasVolatileVariant(
I, AddrSpace);
2271 bool prefersVectorizedAddressing()
override {
2272 return Impl.prefersVectorizedAddressing();
2274 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2275 int64_t BaseOffset,
bool HasBaseReg,
2277 unsigned AddrSpace)
override {
2278 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2281 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2282 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2283 return Impl.isTruncateFree(Ty1, Ty2);
2285 bool isProfitableToHoist(Instruction *
I)
override {
2286 return Impl.isProfitableToHoist(
I);
2288 bool useAA()
override {
return Impl.useAA(); }
2289 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2290 unsigned getRegUsageForType(Type *Ty)
override {
2291 return Impl.getRegUsageForType(Ty);
2293 bool shouldBuildLookupTables()
override {
2294 return Impl.shouldBuildLookupTables();
2296 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2297 return Impl.shouldBuildLookupTablesForConstant(
C);
2299 bool shouldBuildRelLookupTables()
override {
2300 return Impl.shouldBuildRelLookupTables();
2302 bool useColdCCForColdCall(Function &
F)
override {
2303 return Impl.useColdCCForColdCall(
F);
2306 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2307 const APInt &DemandedElts,
2308 bool Insert,
bool Extract,
2310 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2314 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2315 ArrayRef<Type *> Tys,
2317 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2320 bool supportsEfficientVectorElementLoadStore()
override {
2321 return Impl.supportsEfficientVectorElementLoadStore();
2324 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2325 bool supportsTailCallFor(
const CallBase *CB)
override {
2326 return Impl.supportsTailCallFor(CB);
2329 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2330 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2332 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2333 bool IsZeroCmp)
const override {
2334 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2336 bool enableInterleavedAccessVectorization()
override {
2337 return Impl.enableInterleavedAccessVectorization();
2339 bool enableSelectOptimize()
override {
2340 return Impl.enableSelectOptimize();
2342 bool enableMaskedInterleavedAccessVectorization()
override {
2343 return Impl.enableMaskedInterleavedAccessVectorization();
2345 bool isFPVectorizationPotentiallyUnsafe()
override {
2346 return Impl.isFPVectorizationPotentiallyUnsafe();
2348 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2350 unsigned *
Fast)
override {
2355 return Impl.getPopcntSupport(IntTyWidthInBit);
2357 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2359 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2360 return Impl.isExpensiveToSpeculativelyExecute(
I);
2363 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2364 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2367 InstructionCost getFPOpCost(Type *Ty)
override {
2368 return Impl.getFPOpCost(Ty);
2371 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2372 const APInt &Imm, Type *Ty)
override {
2373 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2375 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2377 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2379 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2380 const APInt &Imm, Type *Ty,
2382 Instruction *Inst =
nullptr)
override {
2383 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2386 const APInt &Imm, Type *Ty,
2388 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2390 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2391 return Impl.getNumberOfRegisters(ClassID);
2393 unsigned getRegisterClassForType(
bool Vector,
2394 Type *Ty =
nullptr)
const override {
2395 return Impl.getRegisterClassForType(
Vector, Ty);
2397 const char *getRegisterClassName(
unsigned ClassID)
const override {
2398 return Impl.getRegisterClassName(ClassID);
2400 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2401 return Impl.getRegisterBitWidth(K);
2403 unsigned getMinVectorRegisterBitWidth()
const override {
2404 return Impl.getMinVectorRegisterBitWidth();
2406 std::optional<unsigned>
getMaxVScale()
const override {
2407 return Impl.getMaxVScale();
2409 std::optional<unsigned> getVScaleForTuning()
const override {
2410 return Impl.getVScaleForTuning();
2412 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2413 return Impl.isVScaleKnownToBeAPowerOfTwo();
2415 bool shouldMaximizeVectorBandwidth(
2417 return Impl.shouldMaximizeVectorBandwidth(K);
2419 ElementCount getMinimumVF(
unsigned ElemWidth,
2420 bool IsScalable)
const override {
2421 return Impl.getMinimumVF(ElemWidth, IsScalable);
2423 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2424 return Impl.getMaximumVF(ElemWidth, Opcode);
2426 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2427 Type *ScalarValTy)
const override {
2428 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2430 bool shouldConsiderAddressTypePromotion(
2431 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2432 return Impl.shouldConsiderAddressTypePromotion(
2433 I, AllowPromotionWithoutCommonHeader);
2435 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2436 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2437 return Impl.getCacheSize(Level);
2439 std::optional<unsigned>
2440 getCacheAssociativity(
CacheLevel Level)
const override {
2441 return Impl.getCacheAssociativity(Level);
2446 unsigned getPrefetchDistance()
const override {
2447 return Impl.getPrefetchDistance();
2453 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2454 unsigned NumStridedMemAccesses,
2455 unsigned NumPrefetches,
2456 bool HasCall)
const override {
2457 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2458 NumPrefetches, HasCall);
2464 unsigned getMaxPrefetchIterationsAhead()
const override {
2465 return Impl.getMaxPrefetchIterationsAhead();
2469 bool enableWritePrefetching()
const override {
2470 return Impl.enableWritePrefetching();
2474 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2475 return Impl.shouldPrefetchAddressSpace(AS);
2478 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2479 return Impl.getMaxInterleaveFactor(VF);
2481 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2483 ProfileSummaryInfo *PSI,
2484 BlockFrequencyInfo *BFI)
override {
2485 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2487 InstructionCost getArithmeticInstrCost(
2489 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2490 ArrayRef<const Value *> Args,
2491 const Instruction *CxtI =
nullptr)
override {
2492 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2500 ArrayRef<const Value *> Args)
override {
2501 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args);
2503 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2506 const Instruction *
I)
override {
2507 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2509 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2511 unsigned Index)
override {
2512 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2515 const Instruction *
I =
nullptr)
override {
2516 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2518 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2521 const Instruction *
I)
override {
2522 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2524 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2526 unsigned Index, Value *Op0,
2527 Value *Op1)
override {
2528 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2530 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2532 unsigned Index)
override {
2536 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2537 const APInt &DemandedDstElts,
2539 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2542 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2545 OperandValueInfo OpInfo,
2546 const Instruction *
I)
override {
2550 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2553 const Instruction *
I)
override {
2554 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2557 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2560 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2564 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2565 bool VariableMask, Align Alignment,
2567 const Instruction *
I =
nullptr)
override {
2568 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2571 InstructionCost getInterleavedMemoryOpCost(
2572 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2574 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2575 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2577 UseMaskForCond, UseMaskForGaps);
2580 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2581 std::optional<FastMathFlags> FMF,
2583 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2588 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2591 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2594 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2598 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2600 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2602 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2604 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2606 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2607 ArrayRef<Type *> Tys,
2611 unsigned getNumberOfParts(Type *Tp)
override {
2612 return Impl.getNumberOfParts(Tp);
2614 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2615 const SCEV *
Ptr)
override {
2616 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2618 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2619 return Impl.getCostOfKeepingLiveOverCall(Tys);
2621 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2622 MemIntrinsicInfo &
Info)
override {
2623 return Impl.getTgtMemIntrinsic(Inst,
Info);
2625 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2626 return Impl.getAtomicMemIntrinsicMaxElementSize();
2628 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2629 Type *ExpectedType)
override {
2630 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2632 Type *getMemcpyLoopLoweringType(
2633 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2634 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2635 std::optional<uint32_t> AtomicElementSize)
const override {
2636 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2637 DestAddrSpace, SrcAlign, DestAlign,
2640 void getMemcpyLoopResidualLoweringType(
2641 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2642 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2643 unsigned SrcAlign,
unsigned DestAlign,
2644 std::optional<uint32_t> AtomicCpySize)
const override {
2645 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2646 SrcAddrSpace, DestAddrSpace,
2647 SrcAlign, DestAlign, AtomicCpySize);
2650 const Function *Callee)
const override {
2651 return Impl.areInlineCompatible(Caller, Callee);
2653 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2654 const ArrayRef<Type *> &Types)
const override {
2655 return Impl.areTypesABICompatible(Caller, Callee, Types);
2658 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2661 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2663 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2664 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2666 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2667 return Impl.isLegalToVectorizeLoad(LI);
2669 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2670 return Impl.isLegalToVectorizeStore(SI);
2672 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2673 unsigned AddrSpace)
const override {
2674 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2677 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2678 unsigned AddrSpace)
const override {
2679 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2682 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2683 ElementCount VF)
const override {
2684 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2686 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2687 return Impl.isElementTypeLegalForScalableVector(Ty);
2689 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2690 unsigned ChainSizeInBytes,
2692 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2694 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2695 unsigned ChainSizeInBytes,
2697 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2699 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2700 ReductionFlags Flags)
const override {
2701 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2703 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2704 ReductionFlags Flags)
const override {
2705 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2707 bool preferEpilogueVectorization()
const override {
2708 return Impl.preferEpilogueVectorization();
2711 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
2712 return Impl.shouldExpandReduction(II);
2715 unsigned getGISelRematGlobalCost()
const override {
2716 return Impl.getGISelRematGlobalCost();
2719 unsigned getMinTripCountTailFoldingThreshold()
const override {
2720 return Impl.getMinTripCountTailFoldingThreshold();
2723 bool supportsScalableVectors()
const override {
2724 return Impl.supportsScalableVectors();
2727 bool enableScalableVectorization()
const override {
2728 return Impl.enableScalableVectorization();
2731 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2732 Align Alignment)
const override {
2733 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2737 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2738 return Impl.getVPLegalizationStrategy(PI);
2741 bool hasArmWideBranch(
bool Thumb)
const override {
2742 return Impl.hasArmWideBranch(Thumb);
2745 unsigned getMaxNumArgs()
const override {
2746 return Impl.getMaxNumArgs();
2750template <
typename T>
2752 : TTIImpl(new Model<
T>(Impl)) {}
2783 : TTICallback(Arg.TTICallback) {}
2785 : TTICallback(
std::
move(Arg.TTICallback)) {}
2787 TTICallback =
RHS.TTICallback;
2791 TTICallback = std::move(
RHS.TTICallback);
2823 std::optional<TargetTransformInfo>
TTI;
2825 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL