21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
45class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
65class TargetLibraryInfo;
121 Type *RetTy =
nullptr;
134 bool TypeBasedOnly =
false);
207class TargetTransformInfo;
332 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
340 const PointersChainInfo &
Info,
Type *AccessTy,
491 std::pair<const Value *, unsigned>
687 KnownBits & Known,
bool &KnownBitsComputed)
const;
694 SimplifyAndSetOp)
const;
738 bool HasBaseReg, int64_t Scale,
740 int64_t ScalableOffset = 0)
const;
816 Align Alignment,
unsigned AddrSpace)
const;
860 unsigned AddrSpace = 0)
const;
904 unsigned ScalarOpdIdx)
const;
922 const APInt &DemandedElts,
923 bool Insert,
bool Extract,
932 ArrayRef<Type *> Tys,
988 bool IsZeroCmp)
const;
1020 unsigned *
Fast =
nullptr)
const;
1205 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1216 Type *ScalarValTy)
const;
1222 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1269 unsigned NumStridedMemAccesses,
1270 unsigned NumPrefetches,
bool HasCall)
const;
1310 unsigned Opcode,
Type *Ty,
1314 ArrayRef<const Value *>
Args = {},
const Instruction *CxtI =
nullptr,
1315 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1325 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1326 const SmallBitVector &OpcodeMask,
1341 ArrayRef<const Value *>
Args = {},
1342 const Instruction *CxtI =
nullptr)
const;
1393 unsigned Index)
const;
1415 const Instruction *
I =
nullptr)
const;
1424 unsigned Index = -1, Value *Op0 =
nullptr,
1425 Value *Op1 =
nullptr)
const;
1439 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const;
1449 unsigned Index = -1)
const;
1458 const APInt &DemandedDstElts,
1467 const Instruction *
I =
nullptr)
const;
1491 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1505 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1523 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1528 return FMF && !(*FMF).allowReassoc();
1556 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1605 const SCEV *
Ptr =
nullptr)
const;
1629 Type *ExpectedType)
const;
1634 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
1635 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1645 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1647 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1661 unsigned DefaultCallPenalty)
const;
1697 unsigned AddrSpace)
const;
1701 unsigned AddrSpace)
const;
1713 unsigned ChainSizeInBytes,
1719 unsigned ChainSizeInBytes,
1791 Align Alignment)
const;
1869 template <
typename T>
class Model;
1871 std::unique_ptr<Concept> TTIImpl;
1919 virtual std::pair<const Value *, unsigned>
1923 Value *NewV)
const = 0;
1942 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1947 SimplifyAndSetOp) = 0;
1952 int64_t BaseOffset,
bool HasBaseReg,
1953 int64_t Scale,
unsigned AddrSpace,
1955 int64_t ScalableOffset) = 0;
1976 Align Alignment) = 0;
1978 Align Alignment) = 0;
1984 unsigned AddrSpace) = 0;
1996 bool HasBaseReg, int64_t Scale,
1997 unsigned AddrSpace) = 0;
2010 unsigned ScalarOpdIdx) = 0;
2039 unsigned *
Fast) = 0;
2061 Type *Ty =
nullptr)
const = 0;
2071 bool IsScalable)
const = 0;
2072 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
2074 Type *ScalarValTy)
const = 0;
2076 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2095 unsigned NumStridedMemAccesses,
2096 unsigned NumPrefetches,
2097 bool HasCall)
const = 0;
2116 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2130 unsigned Index) = 0;
2151 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) = 0;
2155 unsigned Index) = 0;
2159 const APInt &DemandedDstElts,
2177 bool VariableMask,
Align Alignment,
2182 bool VariableMask,
Align Alignment,
2189 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2192 std::optional<FastMathFlags> FMF,
2219 Type *ExpectedType) = 0;
2222 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
2223 std::optional<uint32_t> AtomicElementSize)
const = 0;
2227 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2229 std::optional<uint32_t> AtomicCpySize)
const = 0;
2233 unsigned DefaultCallPenalty)
const = 0;
2244 unsigned AddrSpace)
const = 0;
2247 unsigned AddrSpace)
const = 0;
2252 unsigned ChainSizeInBytes,
2255 unsigned ChainSizeInBytes,
2272 Align Alignment)
const = 0;
2286template <
typename T>
2291 Model(
T Impl) : Impl(std::move(Impl)) {}
2292 ~Model()
override =
default;
2294 const DataLayout &getDataLayout()
const override {
2295 return Impl.getDataLayout();
2299 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2300 ArrayRef<const Value *>
Operands, Type *AccessType,
2304 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2306 const PointersChainInfo &
Info,
2311 unsigned getInliningThresholdMultiplier()
const override {
2312 return Impl.getInliningThresholdMultiplier();
2314 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2315 return Impl.adjustInliningThreshold(CB);
2317 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2318 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2320 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2321 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2323 int getInliningLastCallToStaticBonus()
const override {
2324 return Impl.getInliningLastCallToStaticBonus();
2326 int getInlinerVectorBonusPercent()
const override {
2327 return Impl.getInlinerVectorBonusPercent();
2329 unsigned getCallerAllocaCost(
const CallBase *CB,
2330 const AllocaInst *AI)
const override {
2331 return Impl.getCallerAllocaCost(CB, AI);
2333 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2334 return Impl.getMemcpyCost(
I);
2337 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2338 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2341 InstructionCost getInstructionCost(
const User *U,
2346 BranchProbability getPredictableBranchThreshold()
override {
2347 return Impl.getPredictableBranchThreshold();
2349 InstructionCost getBranchMispredictPenalty()
override {
2350 return Impl.getBranchMispredictPenalty();
2352 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2353 return Impl.hasBranchDivergence(
F);
2355 bool isSourceOfDivergence(
const Value *V)
override {
2356 return Impl.isSourceOfDivergence(V);
2359 bool isAlwaysUniform(
const Value *V)
override {
2360 return Impl.isAlwaysUniform(V);
2363 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2364 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2367 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2368 return Impl.addrspacesMayAlias(AS0, AS1);
2371 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2373 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2375 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2378 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2379 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2383 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2384 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2387 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2388 return Impl.getAssumedAddrSpace(V);
2391 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2393 std::pair<const Value *, unsigned>
2394 getPredicatedAddrSpace(
const Value *V)
const override {
2395 return Impl.getPredicatedAddrSpace(V);
2398 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2399 Value *NewV)
const override {
2400 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2403 bool isLoweredToCall(
const Function *
F)
override {
2404 return Impl.isLoweredToCall(
F);
2406 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2407 UnrollingPreferences &UP,
2408 OptimizationRemarkEmitter *ORE)
override {
2409 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2411 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2412 PeelingPreferences &PP)
override {
2413 return Impl.getPeelingPreferences(L, SE, PP);
2415 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2416 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2417 HardwareLoopInfo &HWLoopInfo)
override {
2418 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2420 unsigned getEpilogueVectorizationMinVF()
override {
2421 return Impl.getEpilogueVectorizationMinVF();
2423 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2424 return Impl.preferPredicateOverEpilogue(TFI);
2427 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2428 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2430 std::optional<Instruction *>
2431 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2432 return Impl.instCombineIntrinsic(IC,
II);
2434 std::optional<Value *>
2435 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2436 APInt DemandedMask, KnownBits &Known,
2437 bool &KnownBitsComputed)
override {
2438 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2441 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2442 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2443 APInt &UndefElts2, APInt &UndefElts3,
2444 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2445 SimplifyAndSetOp)
override {
2446 return Impl.simplifyDemandedVectorEltsIntrinsic(
2447 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2450 bool isLegalAddImmediate(int64_t Imm)
override {
2451 return Impl.isLegalAddImmediate(Imm);
2453 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2454 return Impl.isLegalAddScalableImmediate(Imm);
2456 bool isLegalICmpImmediate(int64_t Imm)
override {
2457 return Impl.isLegalICmpImmediate(Imm);
2459 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2460 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2461 Instruction *
I, int64_t ScalableOffset)
override {
2462 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2463 AddrSpace,
I, ScalableOffset);
2465 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2466 const TargetTransformInfo::LSRCost &C2)
override {
2467 return Impl.isLSRCostLess(C1, C2);
2469 bool isNumRegsMajorCostOfLSR()
override {
2470 return Impl.isNumRegsMajorCostOfLSR();
2472 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2473 return Impl.shouldDropLSRSolutionIfLessProfitable();
2475 bool isProfitableLSRChainElement(Instruction *
I)
override {
2476 return Impl.isProfitableLSRChainElement(
I);
2478 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2479 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2480 DominatorTree *DT, AssumptionCache *AC,
2481 TargetLibraryInfo *LibInfo)
override {
2482 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2485 getPreferredAddressingMode(
const Loop *L,
2486 ScalarEvolution *SE)
const override {
2487 return Impl.getPreferredAddressingMode(L, SE);
2489 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2490 return Impl.isLegalMaskedStore(DataType, Alignment);
2492 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2493 return Impl.isLegalMaskedLoad(DataType, Alignment);
2495 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2496 return Impl.isLegalNTStore(DataType, Alignment);
2498 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2499 return Impl.isLegalNTLoad(DataType, Alignment);
2501 bool isLegalBroadcastLoad(Type *ElementTy,
2502 ElementCount NumElements)
const override {
2503 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2505 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2506 return Impl.isLegalMaskedScatter(DataType, Alignment);
2508 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2509 return Impl.isLegalMaskedGather(DataType, Alignment);
2511 bool forceScalarizeMaskedGather(
VectorType *DataType,
2512 Align Alignment)
override {
2513 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2515 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2516 Align Alignment)
override {
2517 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2519 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2520 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2522 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2523 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2525 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2526 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2528 bool isLegalInterleavedAccessType(
VectorType *VTy,
unsigned Factor,
2530 unsigned AddrSpace)
override {
2531 return Impl.isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace);
2533 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2534 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2536 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2537 const SmallBitVector &OpcodeMask)
const override {
2538 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2540 bool enableOrderedReductions()
override {
2541 return Impl.enableOrderedReductions();
2543 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2544 return Impl.hasDivRemOp(DataType, IsSigned);
2546 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2547 return Impl.hasVolatileVariant(
I, AddrSpace);
2549 bool prefersVectorizedAddressing()
override {
2550 return Impl.prefersVectorizedAddressing();
2552 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2553 StackOffset BaseOffset,
bool HasBaseReg,
2555 unsigned AddrSpace)
override {
2556 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2559 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2560 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2561 return Impl.isTruncateFree(Ty1, Ty2);
2563 bool isProfitableToHoist(Instruction *
I)
override {
2564 return Impl.isProfitableToHoist(
I);
2566 bool useAA()
override {
return Impl.useAA(); }
2567 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2568 unsigned getRegUsageForType(Type *Ty)
override {
2569 return Impl.getRegUsageForType(Ty);
2571 bool shouldBuildLookupTables()
override {
2572 return Impl.shouldBuildLookupTables();
2574 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2575 return Impl.shouldBuildLookupTablesForConstant(
C);
2577 bool shouldBuildRelLookupTables()
override {
2578 return Impl.shouldBuildRelLookupTables();
2580 bool useColdCCForColdCall(Function &
F)
override {
2581 return Impl.useColdCCForColdCall(
F);
2583 bool isTargetIntrinsicTriviallyScalarizable(
Intrinsic::ID ID)
override {
2584 return Impl.isTargetIntrinsicTriviallyScalarizable(
ID);
2588 unsigned ScalarOpdIdx)
override {
2589 return Impl.isTargetIntrinsicWithScalarOpAtArg(
ID, ScalarOpdIdx);
2593 int OpdIdx)
override {
2594 return Impl.isTargetIntrinsicWithOverloadTypeAtArg(
ID, OpdIdx);
2597 bool isTargetIntrinsicWithStructReturnOverloadAtField(
Intrinsic::ID ID,
2598 int RetIdx)
override {
2599 return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(
ID, RetIdx);
2602 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2603 const APInt &DemandedElts,
2604 bool Insert,
bool Extract,
2606 ArrayRef<Value *> VL = {})
override {
2607 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2611 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2612 ArrayRef<Type *> Tys,
2614 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2617 bool supportsEfficientVectorElementLoadStore()
override {
2618 return Impl.supportsEfficientVectorElementLoadStore();
2621 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2622 bool supportsTailCallFor(
const CallBase *CB)
override {
2623 return Impl.supportsTailCallFor(CB);
2626 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2627 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2629 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2630 bool IsZeroCmp)
const override {
2631 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2633 bool enableSelectOptimize()
override {
2634 return Impl.enableSelectOptimize();
2636 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2637 return Impl.shouldTreatInstructionLikeSelect(
I);
2639 bool enableInterleavedAccessVectorization()
override {
2640 return Impl.enableInterleavedAccessVectorization();
2642 bool enableMaskedInterleavedAccessVectorization()
override {
2643 return Impl.enableMaskedInterleavedAccessVectorization();
2645 bool isFPVectorizationPotentiallyUnsafe()
override {
2646 return Impl.isFPVectorizationPotentiallyUnsafe();
2648 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2650 unsigned *
Fast)
override {
2655 return Impl.getPopcntSupport(IntTyWidthInBit);
2657 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2659 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2660 return Impl.isExpensiveToSpeculativelyExecute(
I);
2663 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2664 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2667 InstructionCost getFPOpCost(Type *Ty)
override {
2668 return Impl.getFPOpCost(Ty);
2671 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2672 const APInt &Imm, Type *Ty)
override {
2673 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2675 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2677 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2679 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2680 const APInt &Imm, Type *Ty,
2682 Instruction *Inst =
nullptr)
override {
2683 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2686 const APInt &Imm, Type *Ty,
2688 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2690 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2691 const Function &Fn)
const override {
2692 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2694 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2695 return Impl.getNumberOfRegisters(ClassID);
2697 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2698 return Impl.hasConditionalLoadStoreForType(Ty);
2700 unsigned getRegisterClassForType(
bool Vector,
2701 Type *Ty =
nullptr)
const override {
2702 return Impl.getRegisterClassForType(
Vector, Ty);
2704 const char *getRegisterClassName(
unsigned ClassID)
const override {
2705 return Impl.getRegisterClassName(ClassID);
2707 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2708 return Impl.getRegisterBitWidth(K);
2710 unsigned getMinVectorRegisterBitWidth()
const override {
2711 return Impl.getMinVectorRegisterBitWidth();
2713 std::optional<unsigned>
getMaxVScale()
const override {
2714 return Impl.getMaxVScale();
2716 std::optional<unsigned> getVScaleForTuning()
const override {
2717 return Impl.getVScaleForTuning();
2719 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2720 return Impl.isVScaleKnownToBeAPowerOfTwo();
2722 bool shouldMaximizeVectorBandwidth(
2724 return Impl.shouldMaximizeVectorBandwidth(K);
2726 ElementCount getMinimumVF(
unsigned ElemWidth,
2727 bool IsScalable)
const override {
2728 return Impl.getMinimumVF(ElemWidth, IsScalable);
2730 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2731 return Impl.getMaximumVF(ElemWidth, Opcode);
2733 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2734 Type *ScalarValTy)
const override {
2735 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2737 bool shouldConsiderAddressTypePromotion(
2738 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2739 return Impl.shouldConsiderAddressTypePromotion(
2740 I, AllowPromotionWithoutCommonHeader);
2742 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2743 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2744 return Impl.getCacheSize(Level);
2746 std::optional<unsigned>
2747 getCacheAssociativity(
CacheLevel Level)
const override {
2748 return Impl.getCacheAssociativity(Level);
2751 std::optional<unsigned> getMinPageSize()
const override {
2752 return Impl.getMinPageSize();
2757 unsigned getPrefetchDistance()
const override {
2758 return Impl.getPrefetchDistance();
2764 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2765 unsigned NumStridedMemAccesses,
2766 unsigned NumPrefetches,
2767 bool HasCall)
const override {
2768 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2769 NumPrefetches, HasCall);
2775 unsigned getMaxPrefetchIterationsAhead()
const override {
2776 return Impl.getMaxPrefetchIterationsAhead();
2780 bool enableWritePrefetching()
const override {
2781 return Impl.enableWritePrefetching();
2785 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2786 return Impl.shouldPrefetchAddressSpace(AS);
2789 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2790 return Impl.getMaxInterleaveFactor(VF);
2792 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2794 ProfileSummaryInfo *PSI,
2795 BlockFrequencyInfo *BFI)
override {
2796 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2798 InstructionCost getArithmeticInstrCost(
2800 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2801 ArrayRef<const Value *> Args,
2802 const Instruction *CxtI =
nullptr)
override {
2803 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2806 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2808 const SmallBitVector &OpcodeMask,
2810 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2817 ArrayRef<const Value *> Args,
2818 const Instruction *CxtI)
override {
2819 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2822 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2825 const Instruction *
I)
override {
2826 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2828 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2830 unsigned Index)
override {
2831 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2834 const Instruction *
I =
nullptr)
override {
2835 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2837 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2840 OperandValueInfo Op1Info,
2841 OperandValueInfo Op2Info,
2842 const Instruction *
I)
override {
2843 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
2844 Op1Info, Op2Info,
I);
2846 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2848 unsigned Index, Value *Op0,
2849 Value *Op1)
override {
2850 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2852 InstructionCost getVectorInstrCost(
2855 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
override {
2856 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Scalar,
2859 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2861 unsigned Index)
override {
2865 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2866 const APInt &DemandedDstElts,
2868 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2871 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2874 OperandValueInfo OpInfo,
2875 const Instruction *
I)
override {
2879 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2882 const Instruction *
I)
override {
2883 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2886 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2889 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2893 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2894 bool VariableMask, Align Alignment,
2896 const Instruction *
I =
nullptr)
override {
2897 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2901 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2902 bool VariableMask, Align Alignment,
2904 const Instruction *
I =
nullptr)
override {
2905 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2908 InstructionCost getInterleavedMemoryOpCost(
2909 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2911 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2912 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2914 UseMaskForCond, UseMaskForGaps);
2917 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2918 std::optional<FastMathFlags> FMF,
2920 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2925 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2928 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2931 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2935 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2937 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2939 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2941 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2943 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2944 ArrayRef<Type *> Tys,
2948 unsigned getNumberOfParts(Type *Tp)
override {
2949 return Impl.getNumberOfParts(Tp);
2951 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2952 const SCEV *
Ptr)
override {
2953 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2955 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2956 return Impl.getCostOfKeepingLiveOverCall(Tys);
2958 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2959 MemIntrinsicInfo &
Info)
override {
2960 return Impl.getTgtMemIntrinsic(Inst,
Info);
2962 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2963 return Impl.getAtomicMemIntrinsicMaxElementSize();
2965 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2966 Type *ExpectedType)
override {
2967 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2969 Type *getMemcpyLoopLoweringType(
2970 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2971 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2972 std::optional<uint32_t> AtomicElementSize)
const override {
2973 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2974 DestAddrSpace, SrcAlign, DestAlign,
2977 void getMemcpyLoopResidualLoweringType(
2978 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2979 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2980 Align SrcAlign, Align DestAlign,
2981 std::optional<uint32_t> AtomicCpySize)
const override {
2982 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2983 SrcAddrSpace, DestAddrSpace,
2984 SrcAlign, DestAlign, AtomicCpySize);
2987 const Function *Callee)
const override {
2988 return Impl.areInlineCompatible(Caller, Callee);
2990 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2991 unsigned DefaultCallPenalty)
const override {
2992 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2994 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2995 const ArrayRef<Type *> &Types)
const override {
2996 return Impl.areTypesABICompatible(Caller, Callee, Types);
2999 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
3002 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
3004 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
3005 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
3007 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
3008 return Impl.isLegalToVectorizeLoad(LI);
3010 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
3011 return Impl.isLegalToVectorizeStore(SI);
3013 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
3014 unsigned AddrSpace)
const override {
3015 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
3018 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
3019 unsigned AddrSpace)
const override {
3020 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
3023 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
3024 ElementCount VF)
const override {
3025 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
3027 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
3028 return Impl.isElementTypeLegalForScalableVector(Ty);
3030 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
3031 unsigned ChainSizeInBytes,
3033 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
3035 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
3036 unsigned ChainSizeInBytes,
3038 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
3040 bool preferFixedOverScalableIfEqualCost()
const override {
3041 return Impl.preferFixedOverScalableIfEqualCost();
3043 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
3044 ReductionFlags Flags)
const override {
3045 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
3047 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
3048 ReductionFlags Flags)
const override {
3049 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
3051 bool preferEpilogueVectorization()
const override {
3052 return Impl.preferEpilogueVectorization();
3055 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
3056 return Impl.shouldExpandReduction(
II);
3060 getPreferredExpandedReductionShuffle(
const IntrinsicInst *
II)
const override {
3061 return Impl.getPreferredExpandedReductionShuffle(
II);
3064 unsigned getGISelRematGlobalCost()
const override {
3065 return Impl.getGISelRematGlobalCost();
3068 unsigned getMinTripCountTailFoldingThreshold()
const override {
3069 return Impl.getMinTripCountTailFoldingThreshold();
3072 bool supportsScalableVectors()
const override {
3073 return Impl.supportsScalableVectors();
3076 bool enableScalableVectorization()
const override {
3077 return Impl.enableScalableVectorization();
3080 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
3081 Align Alignment)
const override {
3082 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
3085 bool isProfitableToSinkOperands(Instruction *
I,
3086 SmallVectorImpl<Use *> &Ops)
const override {
3087 return Impl.isProfitableToSinkOperands(
I, Ops);
3090 bool isVectorShiftByScalarCheap(Type *Ty)
const override {
3091 return Impl.isVectorShiftByScalarCheap(Ty);
3095 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
3096 return Impl.getVPLegalizationStrategy(PI);
3099 bool hasArmWideBranch(
bool Thumb)
const override {
3100 return Impl.hasArmWideBranch(Thumb);
3103 unsigned getMaxNumArgs()
const override {
3104 return Impl.getMaxNumArgs();
3107 unsigned getNumBytesToPadGlobalArray(
unsigned Size,
3113template <
typename T>
3115 : TTIImpl(new Model<
T>(Impl)) {}
3146 : TTICallback(Arg.TTICallback) {}
3148 : TTICallback(
std::
move(Arg.TTICallback)) {}
3150 TTICallback =
RHS.TTICallback;
3154 TTICallback = std::move(
RHS.TTICallback);
3186 std::optional<TargetTransformInfo>
TTI;
3188 virtual void anchor();
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL