21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
121 Type *RetTy =
nullptr;
134 bool TypeBasedOnly =
false);
195class TargetTransformInfo;
316 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
322 const PointersChainInfo &
Info,
450 std::pair<const Value *, unsigned>
637 KnownBits & Known,
bool &KnownBitsComputed)
const;
644 SimplifyAndSetOp)
const;
678 bool HasBaseReg, int64_t Scale,
679 unsigned AddrSpace = 0,
782 int64_t BaseOffset,
bool HasBaseReg,
784 unsigned AddrSpace = 0)
const;
828 const APInt &DemandedElts,
829 bool Insert,
bool Extract,
882 bool IsZeroCmp)
const;
908 unsigned *
Fast =
nullptr)
const;
1076 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1087 Type *ScalarValTy)
const;
1093 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1137 unsigned NumStridedMemAccesses,
1138 unsigned NumPrefetches,
bool HasCall)
const;
1176 unsigned Opcode,
Type *Ty,
1180 ArrayRef<const Value *>
Args = ArrayRef<const Value *>(),
1181 const Instruction *CxtI =
nullptr)
const;
1193 ArrayRef<int> Mask = std::nullopt,
1196 ArrayRef<const Value *> Args = std::nullopt)
const;
1247 unsigned Index)
const;
1275 Value *Op1 =
nullptr)
const;
1285 unsigned Index = -1)
const;
1294 const APInt &DemandedDstElts,
1303 const Instruction *
I =
nullptr)
const;
1327 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1345 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1350 return FMF && !(*FMF).allowReassoc();
1378 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1400 std::optional<FastMathFlags> FMF,
1427 const SCEV *
Ptr =
nullptr)
const;
1451 Type *ExpectedType)
const;
1456 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1457 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1467 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1468 unsigned SrcAlign,
unsigned DestAlign,
1469 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1510 unsigned AddrSpace)
const;
1514 unsigned AddrSpace)
const;
1526 unsigned ChainSizeInBytes,
1532 unsigned ChainSizeInBytes,
1592 Align Alignment)
const;
1648 template <
typename T>
class Model;
1650 std::unique_ptr<Concept> TTIImpl;
1688 virtual std::pair<const Value *, unsigned>
1692 Value *NewV)
const = 0;
1714 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1719 SimplifyAndSetOp) = 0;
1723 int64_t BaseOffset,
bool HasBaseReg,
1724 int64_t Scale,
unsigned AddrSpace,
1745 Align Alignment) = 0;
1747 Align Alignment) = 0;
1759 bool HasBaseReg, int64_t Scale,
1760 unsigned AddrSpace) = 0;
1772 const APInt &DemandedElts,
1773 bool Insert,
bool Extract,
1793 unsigned *
Fast) = 0;
1812 Type *Ty =
nullptr)
const = 0;
1821 bool IsScalable)
const = 0;
1822 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1824 Type *ScalarValTy)
const = 0;
1826 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1844 unsigned NumStridedMemAccesses,
1845 unsigned NumPrefetches,
1846 bool HasCall)
const = 0;
1876 unsigned Index) = 0;
1891 unsigned Index) = 0;
1895 const APInt &DemandedDstElts,
1913 bool VariableMask,
Align Alignment,
1920 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
1923 std::optional<FastMathFlags> FMF,
1930 std::optional<FastMathFlags> FMF,
1950 Type *ExpectedType) = 0;
1953 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1954 std::optional<uint32_t> AtomicElementSize)
const = 0;
1958 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1959 unsigned SrcAlign,
unsigned DestAlign,
1960 std::optional<uint32_t> AtomicCpySize)
const = 0;
1973 unsigned AddrSpace)
const = 0;
1976 unsigned AddrSpace)
const = 0;
1981 unsigned ChainSizeInBytes,
1984 unsigned ChainSizeInBytes,
1998 Align Alignment)
const = 0;
2004template <
typename T>
2009 Model(
T Impl) : Impl(std::move(Impl)) {}
2010 ~Model()
override =
default;
2012 const DataLayout &getDataLayout()
const override {
2013 return Impl.getDataLayout();
2017 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2022 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2024 const PointersChainInfo &
Info,
2028 unsigned getInliningThresholdMultiplier()
override {
2029 return Impl.getInliningThresholdMultiplier();
2031 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2032 return Impl.adjustInliningThreshold(CB);
2034 int getInlinerVectorBonusPercent()
override {
2035 return Impl.getInlinerVectorBonusPercent();
2037 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2038 return Impl.getMemcpyCost(
I);
2040 InstructionCost getInstructionCost(
const User *U,
2045 BranchProbability getPredictableBranchThreshold()
override {
2046 return Impl.getPredictableBranchThreshold();
2048 bool hasBranchDivergence()
override {
return Impl.hasBranchDivergence(); }
2049 bool useGPUDivergenceAnalysis()
override {
2050 return Impl.useGPUDivergenceAnalysis();
2052 bool isSourceOfDivergence(
const Value *V)
override {
2053 return Impl.isSourceOfDivergence(V);
2056 bool isAlwaysUniform(
const Value *V)
override {
2057 return Impl.isAlwaysUniform(V);
2060 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2062 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2064 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2067 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2068 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2072 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2073 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2076 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2077 return Impl.getAssumedAddrSpace(V);
2080 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2082 std::pair<const Value *, unsigned>
2083 getPredicatedAddrSpace(
const Value *V)
const override {
2084 return Impl.getPredicatedAddrSpace(V);
2087 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2088 Value *NewV)
const override {
2089 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2092 bool isLoweredToCall(
const Function *
F)
override {
2093 return Impl.isLoweredToCall(
F);
2095 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2096 UnrollingPreferences &UP,
2097 OptimizationRemarkEmitter *ORE)
override {
2098 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2100 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2101 PeelingPreferences &PP)
override {
2102 return Impl.getPeelingPreferences(L, SE, PP);
2104 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2105 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2106 HardwareLoopInfo &HWLoopInfo)
override {
2107 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2109 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
2110 AssumptionCache &AC, TargetLibraryInfo *TLI,
2112 LoopVectorizationLegality *LVL,
2113 InterleavedAccessInfo *IAI)
override {
2114 return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
2117 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2118 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2120 std::optional<Instruction *>
2121 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
override {
2122 return Impl.instCombineIntrinsic(IC, II);
2124 std::optional<Value *>
2125 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2126 APInt DemandedMask, KnownBits &Known,
2127 bool &KnownBitsComputed)
override {
2128 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2131 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2132 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2133 APInt &UndefElts2, APInt &UndefElts3,
2134 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2135 SimplifyAndSetOp)
override {
2136 return Impl.simplifyDemandedVectorEltsIntrinsic(
2137 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2140 bool isLegalAddImmediate(int64_t Imm)
override {
2141 return Impl.isLegalAddImmediate(Imm);
2143 bool isLegalICmpImmediate(int64_t Imm)
override {
2144 return Impl.isLegalICmpImmediate(Imm);
2146 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2147 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2148 Instruction *
I)
override {
2149 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2152 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2153 const TargetTransformInfo::LSRCost &C2)
override {
2154 return Impl.isLSRCostLess(C1, C2);
2156 bool isNumRegsMajorCostOfLSR()
override {
2157 return Impl.isNumRegsMajorCostOfLSR();
2159 bool isProfitableLSRChainElement(Instruction *
I)
override {
2160 return Impl.isProfitableLSRChainElement(
I);
2162 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2163 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2164 DominatorTree *DT, AssumptionCache *AC,
2165 TargetLibraryInfo *LibInfo)
override {
2166 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2169 getPreferredAddressingMode(
const Loop *L,
2170 ScalarEvolution *SE)
const override {
2171 return Impl.getPreferredAddressingMode(L, SE);
2173 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2174 return Impl.isLegalMaskedStore(DataType, Alignment);
2176 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2177 return Impl.isLegalMaskedLoad(DataType, Alignment);
2179 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2180 return Impl.isLegalNTStore(DataType, Alignment);
2182 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2183 return Impl.isLegalNTLoad(DataType, Alignment);
2185 bool isLegalBroadcastLoad(Type *ElementTy,
2186 ElementCount NumElements)
const override {
2187 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2189 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2190 return Impl.isLegalMaskedScatter(DataType, Alignment);
2192 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2193 return Impl.isLegalMaskedGather(DataType, Alignment);
2195 bool forceScalarizeMaskedGather(
VectorType *DataType,
2196 Align Alignment)
override {
2197 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2199 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2200 Align Alignment)
override {
2201 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2203 bool isLegalMaskedCompressStore(Type *DataType)
override {
2204 return Impl.isLegalMaskedCompressStore(DataType);
2206 bool isLegalMaskedExpandLoad(Type *DataType)
override {
2207 return Impl.isLegalMaskedExpandLoad(DataType);
2209 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2210 const SmallBitVector &OpcodeMask)
const override {
2211 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2213 bool enableOrderedReductions()
override {
2214 return Impl.enableOrderedReductions();
2216 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2217 return Impl.hasDivRemOp(DataType, IsSigned);
2219 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2220 return Impl.hasVolatileVariant(
I, AddrSpace);
2222 bool prefersVectorizedAddressing()
override {
2223 return Impl.prefersVectorizedAddressing();
2225 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2226 int64_t BaseOffset,
bool HasBaseReg,
2228 unsigned AddrSpace)
override {
2229 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2232 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2233 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2234 return Impl.isTruncateFree(Ty1, Ty2);
2236 bool isProfitableToHoist(Instruction *
I)
override {
2237 return Impl.isProfitableToHoist(
I);
2239 bool useAA()
override {
return Impl.useAA(); }
2240 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2241 unsigned getRegUsageForType(Type *Ty)
override {
2242 return Impl.getRegUsageForType(Ty);
2244 bool shouldBuildLookupTables()
override {
2245 return Impl.shouldBuildLookupTables();
2247 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2248 return Impl.shouldBuildLookupTablesForConstant(
C);
2250 bool shouldBuildRelLookupTables()
override {
2251 return Impl.shouldBuildRelLookupTables();
2253 bool useColdCCForColdCall(Function &
F)
override {
2254 return Impl.useColdCCForColdCall(
F);
2257 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2258 const APInt &DemandedElts,
2259 bool Insert,
bool Extract,
2261 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2265 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2266 ArrayRef<Type *> Tys,
2268 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2271 bool supportsEfficientVectorElementLoadStore()
override {
2272 return Impl.supportsEfficientVectorElementLoadStore();
2275 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2276 bool supportsTailCallFor(
const CallBase *CB)
override {
2277 return Impl.supportsTailCallFor(CB);
2280 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2281 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2283 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2284 bool IsZeroCmp)
const override {
2285 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2287 bool enableInterleavedAccessVectorization()
override {
2288 return Impl.enableInterleavedAccessVectorization();
2290 bool enableSelectOptimize()
override {
2291 return Impl.enableSelectOptimize();
2293 bool enableMaskedInterleavedAccessVectorization()
override {
2294 return Impl.enableMaskedInterleavedAccessVectorization();
2296 bool isFPVectorizationPotentiallyUnsafe()
override {
2297 return Impl.isFPVectorizationPotentiallyUnsafe();
2299 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2301 unsigned *
Fast)
override {
2306 return Impl.getPopcntSupport(IntTyWidthInBit);
2308 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2310 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2311 return Impl.isExpensiveToSpeculativelyExecute(
I);
2314 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2315 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2318 InstructionCost getFPOpCost(Type *Ty)
override {
2319 return Impl.getFPOpCost(Ty);
2322 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2323 const APInt &Imm, Type *Ty)
override {
2324 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2326 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2328 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2330 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2331 const APInt &Imm, Type *Ty,
2333 Instruction *Inst =
nullptr)
override {
2334 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2337 const APInt &Imm, Type *Ty,
2339 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2341 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2342 return Impl.getNumberOfRegisters(ClassID);
2344 unsigned getRegisterClassForType(
bool Vector,
2345 Type *Ty =
nullptr)
const override {
2346 return Impl.getRegisterClassForType(
Vector, Ty);
2348 const char *getRegisterClassName(
unsigned ClassID)
const override {
2349 return Impl.getRegisterClassName(ClassID);
2351 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2352 return Impl.getRegisterBitWidth(K);
2354 unsigned getMinVectorRegisterBitWidth()
const override {
2355 return Impl.getMinVectorRegisterBitWidth();
2357 std::optional<unsigned>
getMaxVScale()
const override {
2358 return Impl.getMaxVScale();
2360 std::optional<unsigned> getVScaleForTuning()
const override {
2361 return Impl.getVScaleForTuning();
2363 bool shouldMaximizeVectorBandwidth(
2365 return Impl.shouldMaximizeVectorBandwidth(K);
2367 ElementCount getMinimumVF(
unsigned ElemWidth,
2368 bool IsScalable)
const override {
2369 return Impl.getMinimumVF(ElemWidth, IsScalable);
2371 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2372 return Impl.getMaximumVF(ElemWidth, Opcode);
2374 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2375 Type *ScalarValTy)
const override {
2376 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2378 bool shouldConsiderAddressTypePromotion(
2379 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2380 return Impl.shouldConsiderAddressTypePromotion(
2381 I, AllowPromotionWithoutCommonHeader);
2383 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2384 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2385 return Impl.getCacheSize(Level);
2387 std::optional<unsigned>
2388 getCacheAssociativity(
CacheLevel Level)
const override {
2389 return Impl.getCacheAssociativity(Level);
2394 unsigned getPrefetchDistance()
const override {
2395 return Impl.getPrefetchDistance();
2401 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2402 unsigned NumStridedMemAccesses,
2403 unsigned NumPrefetches,
2404 bool HasCall)
const override {
2405 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2406 NumPrefetches, HasCall);
2412 unsigned getMaxPrefetchIterationsAhead()
const override {
2413 return Impl.getMaxPrefetchIterationsAhead();
2417 bool enableWritePrefetching()
const override {
2418 return Impl.enableWritePrefetching();
2422 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2423 return Impl.shouldPrefetchAddressSpace(AS);
2426 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2427 return Impl.getMaxInterleaveFactor(VF);
2429 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2431 ProfileSummaryInfo *PSI,
2432 BlockFrequencyInfo *BFI)
override {
2433 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2435 InstructionCost getArithmeticInstrCost(
2437 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2438 ArrayRef<const Value *> Args,
2439 const Instruction *CxtI =
nullptr)
override {
2440 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2448 ArrayRef<const Value *> Args)
override {
2449 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args);
2451 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2454 const Instruction *
I)
override {
2455 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2457 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2459 unsigned Index)
override {
2460 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2463 const Instruction *
I =
nullptr)
override {
2464 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2466 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2469 const Instruction *
I)
override {
2470 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2472 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2474 unsigned Index, Value *Op0,
2475 Value *Op1)
override {
2476 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2478 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2480 unsigned Index)
override {
2484 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2485 const APInt &DemandedDstElts,
2487 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2490 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2493 OperandValueInfo OpInfo,
2494 const Instruction *
I)
override {
2498 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2501 const Instruction *
I)
override {
2502 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2505 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2508 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2512 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2513 bool VariableMask, Align Alignment,
2515 const Instruction *
I =
nullptr)
override {
2516 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2519 InstructionCost getInterleavedMemoryOpCost(
2520 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2522 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2523 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2525 UseMaskForCond, UseMaskForGaps);
2528 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2529 std::optional<FastMathFlags> FMF,
2531 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2536 return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned,
CostKind);
2538 InstructionCost getExtendedReductionCost(
2539 unsigned Opcode,
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2540 std::optional<FastMathFlags> FMF,
2542 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2545 InstructionCost getMulAccReductionCost(
2546 bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2548 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2550 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2552 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2554 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2555 ArrayRef<Type *> Tys,
2559 unsigned getNumberOfParts(Type *Tp)
override {
2560 return Impl.getNumberOfParts(Tp);
2562 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2563 const SCEV *
Ptr)
override {
2564 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2566 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2567 return Impl.getCostOfKeepingLiveOverCall(Tys);
2569 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2570 MemIntrinsicInfo &
Info)
override {
2571 return Impl.getTgtMemIntrinsic(Inst,
Info);
2573 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2574 return Impl.getAtomicMemIntrinsicMaxElementSize();
2576 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2577 Type *ExpectedType)
override {
2578 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2580 Type *getMemcpyLoopLoweringType(
2581 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2582 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2583 std::optional<uint32_t> AtomicElementSize)
const override {
2584 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2585 DestAddrSpace, SrcAlign, DestAlign,
2588 void getMemcpyLoopResidualLoweringType(
2589 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2590 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2591 unsigned SrcAlign,
unsigned DestAlign,
2592 std::optional<uint32_t> AtomicCpySize)
const override {
2593 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2594 SrcAddrSpace, DestAddrSpace,
2595 SrcAlign, DestAlign, AtomicCpySize);
2598 const Function *
Callee)
const override {
2599 return Impl.areInlineCompatible(Caller,
Callee);
2601 bool areTypesABICompatible(
const Function *Caller,
const Function *
Callee,
2602 const ArrayRef<Type *> &Types)
const override {
2603 return Impl.areTypesABICompatible(Caller,
Callee, Types);
2606 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2609 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2611 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2612 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2614 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2615 return Impl.isLegalToVectorizeLoad(LI);
2617 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2618 return Impl.isLegalToVectorizeStore(SI);
2620 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2621 unsigned AddrSpace)
const override {
2622 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2625 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2626 unsigned AddrSpace)
const override {
2627 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2630 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2631 ElementCount VF)
const override {
2632 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2634 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2635 return Impl.isElementTypeLegalForScalableVector(Ty);
2637 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2638 unsigned ChainSizeInBytes,
2640 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2642 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2643 unsigned ChainSizeInBytes,
2645 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2647 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2648 ReductionFlags
Flags)
const override {
2649 return Impl.preferInLoopReduction(Opcode, Ty,
Flags);
2651 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2652 ReductionFlags
Flags)
const override {
2653 return Impl.preferPredicatedReductionSelect(Opcode, Ty,
Flags);
2655 bool preferEpilogueVectorization()
const override {
2656 return Impl.preferEpilogueVectorization();
2659 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
2660 return Impl.shouldExpandReduction(II);
2663 unsigned getGISelRematGlobalCost()
const override {
2664 return Impl.getGISelRematGlobalCost();
2667 unsigned getMinTripCountTailFoldingThreshold()
const override {
2668 return Impl.getMinTripCountTailFoldingThreshold();
2671 bool supportsScalableVectors()
const override {
2672 return Impl.supportsScalableVectors();
2675 bool enableScalableVectorization()
const override {
2676 return Impl.enableScalableVectorization();
2679 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2680 Align Alignment)
const override {
2681 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2685 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2686 return Impl.getVPLegalizationStrategy(PI);
2689 bool hasArmWideBranch(
bool Thumb)
const override {
2690 return Impl.hasArmWideBranch(Thumb);
2694template <
typename T>
2696 : TTIImpl(new Model<
T>(Impl)) {}
2727 : TTICallback(
Arg.TTICallback) {}
2731 TTICallback =
RHS.TTICallback;
2735 TTICallback = std::move(
RHS.TTICallback);
2767 std::optional<TargetTransformInfo>
TTI;
2769 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
constexpr std::nullopt_t None
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
HardwareLoopInfo(Loop *L)
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.