21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
45class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
65class TargetLibraryInfo;
121 Type *RetTy =
nullptr;
134 bool TypeBasedOnly =
false);
207class TargetTransformInfo;
338 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
346 const PointersChainInfo &
Info,
Type *AccessTy,
497 std::pair<const Value *, unsigned>
693 KnownBits & Known,
bool &KnownBitsComputed)
const;
700 SimplifyAndSetOp)
const;
744 bool HasBaseReg, int64_t Scale,
746 int64_t ScalableOffset = 0)
const;
822 Align Alignment,
unsigned AddrSpace)
const;
866 unsigned AddrSpace = 0)
const;
910 unsigned ScalarOpdIdx)
const;
928 const APInt &DemandedElts,
929 bool Insert,
bool Extract,
938 ArrayRef<Type *> Tys,
994 bool IsZeroCmp)
const;
1026 unsigned *
Fast =
nullptr)
const;
1211 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1222 Type *ScalarValTy)
const;
1228 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1275 unsigned NumStridedMemAccesses,
1276 unsigned NumPrefetches,
bool HasCall)
const;
1301 std::optional<unsigned> BinOp = std::nullopt)
const;
1330 unsigned Opcode,
Type *Ty,
1334 ArrayRef<const Value *>
Args = {},
const Instruction *CxtI =
nullptr,
1335 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1345 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1346 const SmallBitVector &OpcodeMask,
1361 ArrayRef<const Value *>
Args = {},
1362 const Instruction *CxtI =
nullptr)
const;
1413 unsigned Index)
const;
1435 const Instruction *
I =
nullptr)
const;
1444 unsigned Index = -1, Value *Op0 =
nullptr,
1445 Value *Op1 =
nullptr)
const;
1459 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const;
1469 unsigned Index = -1)
const;
1478 const APInt &DemandedDstElts,
1487 const Instruction *
I =
nullptr)
const;
1511 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1525 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1543 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1548 return FMF && !(*FMF).allowReassoc();
1576 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1625 const SCEV *
Ptr =
nullptr)
const;
1649 Type *ExpectedType)
const;
1654 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
1655 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1665 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1667 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1681 unsigned DefaultCallPenalty)
const;
1717 unsigned AddrSpace)
const;
1721 unsigned AddrSpace)
const;
1733 unsigned ChainSizeInBytes,
1739 unsigned ChainSizeInBytes,
1811 Align Alignment)
const;
1889 template <
typename T>
class Model;
1891 std::unique_ptr<Concept> TTIImpl;
1939 virtual std::pair<const Value *, unsigned>
1943 Value *NewV)
const = 0;
1962 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1967 SimplifyAndSetOp) = 0;
1972 int64_t BaseOffset,
bool HasBaseReg,
1973 int64_t Scale,
unsigned AddrSpace,
1975 int64_t ScalableOffset) = 0;
1996 Align Alignment) = 0;
1998 Align Alignment) = 0;
2004 unsigned AddrSpace) = 0;
2016 bool HasBaseReg, int64_t Scale,
2017 unsigned AddrSpace) = 0;
2030 unsigned ScalarOpdIdx) = 0;
2059 unsigned *
Fast) = 0;
2081 Type *Ty =
nullptr)
const = 0;
2091 bool IsScalable)
const = 0;
2092 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
2094 Type *ScalarValTy)
const = 0;
2096 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2115 unsigned NumStridedMemAccesses,
2116 unsigned NumPrefetches,
2117 bool HasCall)
const = 0;
2142 std::optional<unsigned> BinOp)
const = 0;
2150 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2164 unsigned Index) = 0;
2185 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) = 0;
2189 unsigned Index) = 0;
2193 const APInt &DemandedDstElts,
2211 bool VariableMask,
Align Alignment,
2216 bool VariableMask,
Align Alignment,
2223 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2226 std::optional<FastMathFlags> FMF,
2253 Type *ExpectedType) = 0;
2256 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
2257 std::optional<uint32_t> AtomicElementSize)
const = 0;
2261 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2263 std::optional<uint32_t> AtomicCpySize)
const = 0;
2267 unsigned DefaultCallPenalty)
const = 0;
2278 unsigned AddrSpace)
const = 0;
2281 unsigned AddrSpace)
const = 0;
2286 unsigned ChainSizeInBytes,
2289 unsigned ChainSizeInBytes,
2306 Align Alignment)
const = 0;
2320template <
typename T>
2325 Model(
T Impl) : Impl(std::move(Impl)) {}
2326 ~Model()
override =
default;
2328 const DataLayout &getDataLayout()
const override {
2329 return Impl.getDataLayout();
2333 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2334 ArrayRef<const Value *>
Operands, Type *AccessType,
2338 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2340 const PointersChainInfo &
Info,
2345 unsigned getInliningThresholdMultiplier()
const override {
2346 return Impl.getInliningThresholdMultiplier();
2348 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2349 return Impl.adjustInliningThreshold(CB);
2351 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2352 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2354 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2355 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2357 int getInliningLastCallToStaticBonus()
const override {
2358 return Impl.getInliningLastCallToStaticBonus();
2360 int getInlinerVectorBonusPercent()
const override {
2361 return Impl.getInlinerVectorBonusPercent();
2363 unsigned getCallerAllocaCost(
const CallBase *CB,
2364 const AllocaInst *AI)
const override {
2365 return Impl.getCallerAllocaCost(CB, AI);
2367 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2368 return Impl.getMemcpyCost(
I);
2371 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2372 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2375 InstructionCost getInstructionCost(
const User *U,
2380 BranchProbability getPredictableBranchThreshold()
override {
2381 return Impl.getPredictableBranchThreshold();
2383 InstructionCost getBranchMispredictPenalty()
override {
2384 return Impl.getBranchMispredictPenalty();
2386 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2387 return Impl.hasBranchDivergence(
F);
2389 bool isSourceOfDivergence(
const Value *V)
override {
2390 return Impl.isSourceOfDivergence(V);
2393 bool isAlwaysUniform(
const Value *V)
override {
2394 return Impl.isAlwaysUniform(V);
2397 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2398 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2401 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2402 return Impl.addrspacesMayAlias(AS0, AS1);
2405 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2407 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2409 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2412 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2413 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2417 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2418 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2421 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2422 return Impl.getAssumedAddrSpace(V);
2425 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2427 std::pair<const Value *, unsigned>
2428 getPredicatedAddrSpace(
const Value *V)
const override {
2429 return Impl.getPredicatedAddrSpace(V);
2432 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2433 Value *NewV)
const override {
2434 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2437 bool isLoweredToCall(
const Function *
F)
override {
2438 return Impl.isLoweredToCall(
F);
2440 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2441 UnrollingPreferences &UP,
2442 OptimizationRemarkEmitter *ORE)
override {
2443 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2445 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2446 PeelingPreferences &PP)
override {
2447 return Impl.getPeelingPreferences(L, SE, PP);
2449 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2450 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2451 HardwareLoopInfo &HWLoopInfo)
override {
2452 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2454 unsigned getEpilogueVectorizationMinVF()
override {
2455 return Impl.getEpilogueVectorizationMinVF();
2457 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2458 return Impl.preferPredicateOverEpilogue(TFI);
2461 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2462 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2464 std::optional<Instruction *>
2465 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2466 return Impl.instCombineIntrinsic(IC,
II);
2468 std::optional<Value *>
2469 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2470 APInt DemandedMask, KnownBits &Known,
2471 bool &KnownBitsComputed)
override {
2472 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2475 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2476 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2477 APInt &UndefElts2, APInt &UndefElts3,
2478 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2479 SimplifyAndSetOp)
override {
2480 return Impl.simplifyDemandedVectorEltsIntrinsic(
2481 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2484 bool isLegalAddImmediate(int64_t Imm)
override {
2485 return Impl.isLegalAddImmediate(Imm);
2487 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2488 return Impl.isLegalAddScalableImmediate(Imm);
2490 bool isLegalICmpImmediate(int64_t Imm)
override {
2491 return Impl.isLegalICmpImmediate(Imm);
2493 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2494 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2495 Instruction *
I, int64_t ScalableOffset)
override {
2496 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2497 AddrSpace,
I, ScalableOffset);
2499 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2500 const TargetTransformInfo::LSRCost &C2)
override {
2501 return Impl.isLSRCostLess(C1, C2);
2503 bool isNumRegsMajorCostOfLSR()
override {
2504 return Impl.isNumRegsMajorCostOfLSR();
2506 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2507 return Impl.shouldDropLSRSolutionIfLessProfitable();
2509 bool isProfitableLSRChainElement(Instruction *
I)
override {
2510 return Impl.isProfitableLSRChainElement(
I);
2512 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2513 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2514 DominatorTree *DT, AssumptionCache *AC,
2515 TargetLibraryInfo *LibInfo)
override {
2516 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2519 getPreferredAddressingMode(
const Loop *L,
2520 ScalarEvolution *SE)
const override {
2521 return Impl.getPreferredAddressingMode(L, SE);
2523 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2524 return Impl.isLegalMaskedStore(DataType, Alignment);
2526 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2527 return Impl.isLegalMaskedLoad(DataType, Alignment);
2529 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2530 return Impl.isLegalNTStore(DataType, Alignment);
2532 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2533 return Impl.isLegalNTLoad(DataType, Alignment);
2535 bool isLegalBroadcastLoad(Type *ElementTy,
2536 ElementCount NumElements)
const override {
2537 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2539 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2540 return Impl.isLegalMaskedScatter(DataType, Alignment);
2542 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2543 return Impl.isLegalMaskedGather(DataType, Alignment);
2545 bool forceScalarizeMaskedGather(
VectorType *DataType,
2546 Align Alignment)
override {
2547 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2549 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2550 Align Alignment)
override {
2551 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2553 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2554 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2556 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2557 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2559 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2560 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2562 bool isLegalInterleavedAccessType(
VectorType *VTy,
unsigned Factor,
2564 unsigned AddrSpace)
override {
2565 return Impl.isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace);
2567 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2568 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2570 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2571 const SmallBitVector &OpcodeMask)
const override {
2572 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2574 bool enableOrderedReductions()
override {
2575 return Impl.enableOrderedReductions();
2577 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2578 return Impl.hasDivRemOp(DataType, IsSigned);
2580 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2581 return Impl.hasVolatileVariant(
I, AddrSpace);
2583 bool prefersVectorizedAddressing()
override {
2584 return Impl.prefersVectorizedAddressing();
2586 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2587 StackOffset BaseOffset,
bool HasBaseReg,
2589 unsigned AddrSpace)
override {
2590 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2593 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2594 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2595 return Impl.isTruncateFree(Ty1, Ty2);
2597 bool isProfitableToHoist(Instruction *
I)
override {
2598 return Impl.isProfitableToHoist(
I);
2600 bool useAA()
override {
return Impl.useAA(); }
2601 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2602 unsigned getRegUsageForType(Type *Ty)
override {
2603 return Impl.getRegUsageForType(Ty);
2605 bool shouldBuildLookupTables()
override {
2606 return Impl.shouldBuildLookupTables();
2608 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2609 return Impl.shouldBuildLookupTablesForConstant(
C);
2611 bool shouldBuildRelLookupTables()
override {
2612 return Impl.shouldBuildRelLookupTables();
2614 bool useColdCCForColdCall(Function &
F)
override {
2615 return Impl.useColdCCForColdCall(
F);
2617 bool isTargetIntrinsicTriviallyScalarizable(
Intrinsic::ID ID)
override {
2618 return Impl.isTargetIntrinsicTriviallyScalarizable(
ID);
2622 unsigned ScalarOpdIdx)
override {
2623 return Impl.isTargetIntrinsicWithScalarOpAtArg(
ID, ScalarOpdIdx);
2627 int OpdIdx)
override {
2628 return Impl.isTargetIntrinsicWithOverloadTypeAtArg(
ID, OpdIdx);
2631 bool isTargetIntrinsicWithStructReturnOverloadAtField(
Intrinsic::ID ID,
2632 int RetIdx)
override {
2633 return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(
ID, RetIdx);
2636 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2637 const APInt &DemandedElts,
2638 bool Insert,
bool Extract,
2640 ArrayRef<Value *> VL = {})
override {
2641 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2645 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2646 ArrayRef<Type *> Tys,
2648 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2651 bool supportsEfficientVectorElementLoadStore()
override {
2652 return Impl.supportsEfficientVectorElementLoadStore();
2655 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2656 bool supportsTailCallFor(
const CallBase *CB)
override {
2657 return Impl.supportsTailCallFor(CB);
2660 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2661 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2663 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2664 bool IsZeroCmp)
const override {
2665 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2667 bool enableSelectOptimize()
override {
2668 return Impl.enableSelectOptimize();
2670 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2671 return Impl.shouldTreatInstructionLikeSelect(
I);
2673 bool enableInterleavedAccessVectorization()
override {
2674 return Impl.enableInterleavedAccessVectorization();
2676 bool enableMaskedInterleavedAccessVectorization()
override {
2677 return Impl.enableMaskedInterleavedAccessVectorization();
2679 bool isFPVectorizationPotentiallyUnsafe()
override {
2680 return Impl.isFPVectorizationPotentiallyUnsafe();
2682 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2684 unsigned *
Fast)
override {
2689 return Impl.getPopcntSupport(IntTyWidthInBit);
2691 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2693 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2694 return Impl.isExpensiveToSpeculativelyExecute(
I);
2697 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2698 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2701 InstructionCost getFPOpCost(Type *Ty)
override {
2702 return Impl.getFPOpCost(Ty);
2705 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2706 const APInt &Imm, Type *Ty)
override {
2707 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2709 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2711 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2713 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2714 const APInt &Imm, Type *Ty,
2716 Instruction *Inst =
nullptr)
override {
2717 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2720 const APInt &Imm, Type *Ty,
2722 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2724 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2725 const Function &Fn)
const override {
2726 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2728 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2729 return Impl.getNumberOfRegisters(ClassID);
2731 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2732 return Impl.hasConditionalLoadStoreForType(Ty);
2734 unsigned getRegisterClassForType(
bool Vector,
2735 Type *Ty =
nullptr)
const override {
2736 return Impl.getRegisterClassForType(
Vector, Ty);
2738 const char *getRegisterClassName(
unsigned ClassID)
const override {
2739 return Impl.getRegisterClassName(ClassID);
2741 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2742 return Impl.getRegisterBitWidth(K);
2744 unsigned getMinVectorRegisterBitWidth()
const override {
2745 return Impl.getMinVectorRegisterBitWidth();
2747 std::optional<unsigned>
getMaxVScale()
const override {
2748 return Impl.getMaxVScale();
2750 std::optional<unsigned> getVScaleForTuning()
const override {
2751 return Impl.getVScaleForTuning();
2753 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2754 return Impl.isVScaleKnownToBeAPowerOfTwo();
2756 bool shouldMaximizeVectorBandwidth(
2758 return Impl.shouldMaximizeVectorBandwidth(K);
2760 ElementCount getMinimumVF(
unsigned ElemWidth,
2761 bool IsScalable)
const override {
2762 return Impl.getMinimumVF(ElemWidth, IsScalable);
2764 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2765 return Impl.getMaximumVF(ElemWidth, Opcode);
2767 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2768 Type *ScalarValTy)
const override {
2769 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2771 bool shouldConsiderAddressTypePromotion(
2772 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2773 return Impl.shouldConsiderAddressTypePromotion(
2774 I, AllowPromotionWithoutCommonHeader);
2776 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2777 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2778 return Impl.getCacheSize(Level);
2780 std::optional<unsigned>
2781 getCacheAssociativity(
CacheLevel Level)
const override {
2782 return Impl.getCacheAssociativity(Level);
2785 std::optional<unsigned> getMinPageSize()
const override {
2786 return Impl.getMinPageSize();
2791 unsigned getPrefetchDistance()
const override {
2792 return Impl.getPrefetchDistance();
2798 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2799 unsigned NumStridedMemAccesses,
2800 unsigned NumPrefetches,
2801 bool HasCall)
const override {
2802 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2803 NumPrefetches, HasCall);
2809 unsigned getMaxPrefetchIterationsAhead()
const override {
2810 return Impl.getMaxPrefetchIterationsAhead();
2814 bool enableWritePrefetching()
const override {
2815 return Impl.enableWritePrefetching();
2819 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2820 return Impl.shouldPrefetchAddressSpace(AS);
2823 InstructionCost getPartialReductionCost(
2824 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
2827 std::optional<unsigned> BinOp = std::nullopt)
const override {
2828 return Impl.getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
2829 AccumType, VF, OpAExtend, OpBExtend,
2833 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2834 return Impl.getMaxInterleaveFactor(VF);
2836 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2838 ProfileSummaryInfo *PSI,
2839 BlockFrequencyInfo *BFI)
override {
2840 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2842 InstructionCost getArithmeticInstrCost(
2844 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2845 ArrayRef<const Value *> Args,
2846 const Instruction *CxtI =
nullptr)
override {
2847 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2850 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2852 const SmallBitVector &OpcodeMask,
2854 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2861 ArrayRef<const Value *> Args,
2862 const Instruction *CxtI)
override {
2863 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2866 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2869 const Instruction *
I)
override {
2870 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2872 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2874 unsigned Index)
override {
2875 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2878 const Instruction *
I =
nullptr)
override {
2879 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2881 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2884 OperandValueInfo Op1Info,
2885 OperandValueInfo Op2Info,
2886 const Instruction *
I)
override {
2887 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
2888 Op1Info, Op2Info,
I);
2890 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2892 unsigned Index, Value *Op0,
2893 Value *Op1)
override {
2894 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2896 InstructionCost getVectorInstrCost(
2899 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
override {
2900 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Scalar,
2903 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2905 unsigned Index)
override {
2909 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2910 const APInt &DemandedDstElts,
2912 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2915 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2918 OperandValueInfo OpInfo,
2919 const Instruction *
I)
override {
2923 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2926 const Instruction *
I)
override {
2927 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2930 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2933 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2937 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2938 bool VariableMask, Align Alignment,
2940 const Instruction *
I =
nullptr)
override {
2941 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2945 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2946 bool VariableMask, Align Alignment,
2948 const Instruction *
I =
nullptr)
override {
2949 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2952 InstructionCost getInterleavedMemoryOpCost(
2953 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2955 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2956 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2958 UseMaskForCond, UseMaskForGaps);
2961 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2962 std::optional<FastMathFlags> FMF,
2964 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2969 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2972 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2975 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2979 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2981 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2983 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2985 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2987 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2988 ArrayRef<Type *> Tys,
2992 unsigned getNumberOfParts(Type *Tp)
override {
2993 return Impl.getNumberOfParts(Tp);
2995 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2996 const SCEV *
Ptr)
override {
2997 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2999 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
3000 return Impl.getCostOfKeepingLiveOverCall(Tys);
3002 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
3003 MemIntrinsicInfo &
Info)
override {
3004 return Impl.getTgtMemIntrinsic(Inst,
Info);
3006 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
3007 return Impl.getAtomicMemIntrinsicMaxElementSize();
3009 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
3010 Type *ExpectedType)
override {
3011 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
3013 Type *getMemcpyLoopLoweringType(
3014 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
3015 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
3016 std::optional<uint32_t> AtomicElementSize)
const override {
3017 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
3018 DestAddrSpace, SrcAlign, DestAlign,
3021 void getMemcpyLoopResidualLoweringType(
3022 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
3023 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
3024 Align SrcAlign, Align DestAlign,
3025 std::optional<uint32_t> AtomicCpySize)
const override {
3026 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
3027 SrcAddrSpace, DestAddrSpace,
3028 SrcAlign, DestAlign, AtomicCpySize);
3031 const Function *Callee)
const override {
3032 return Impl.areInlineCompatible(Caller, Callee);
3034 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
3035 unsigned DefaultCallPenalty)
const override {
3036 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
3038 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
3039 const ArrayRef<Type *> &Types)
const override {
3040 return Impl.areTypesABICompatible(Caller, Callee, Types);
3043 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
3046 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
3048 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
3049 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
3051 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
3052 return Impl.isLegalToVectorizeLoad(LI);
3054 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
3055 return Impl.isLegalToVectorizeStore(SI);
3057 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
3058 unsigned AddrSpace)
const override {
3059 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
3062 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
3063 unsigned AddrSpace)
const override {
3064 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
3067 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
3068 ElementCount VF)
const override {
3069 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
3071 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
3072 return Impl.isElementTypeLegalForScalableVector(Ty);
3074 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
3075 unsigned ChainSizeInBytes,
3077 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
3079 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
3080 unsigned ChainSizeInBytes,
3082 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
3084 bool preferFixedOverScalableIfEqualCost()
const override {
3085 return Impl.preferFixedOverScalableIfEqualCost();
3087 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
3088 ReductionFlags Flags)
const override {
3089 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
3091 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
3092 ReductionFlags Flags)
const override {
3093 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
3095 bool preferEpilogueVectorization()
const override {
3096 return Impl.preferEpilogueVectorization();
3099 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
3100 return Impl.shouldExpandReduction(
II);
3104 getPreferredExpandedReductionShuffle(
const IntrinsicInst *
II)
const override {
3105 return Impl.getPreferredExpandedReductionShuffle(
II);
3108 unsigned getGISelRematGlobalCost()
const override {
3109 return Impl.getGISelRematGlobalCost();
3112 unsigned getMinTripCountTailFoldingThreshold()
const override {
3113 return Impl.getMinTripCountTailFoldingThreshold();
3116 bool supportsScalableVectors()
const override {
3117 return Impl.supportsScalableVectors();
3120 bool enableScalableVectorization()
const override {
3121 return Impl.enableScalableVectorization();
3124 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
3125 Align Alignment)
const override {
3126 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
3129 bool isProfitableToSinkOperands(Instruction *
I,
3130 SmallVectorImpl<Use *> &Ops)
const override {
3131 return Impl.isProfitableToSinkOperands(
I, Ops);
3134 bool isVectorShiftByScalarCheap(Type *Ty)
const override {
3135 return Impl.isVectorShiftByScalarCheap(Ty);
3139 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
3140 return Impl.getVPLegalizationStrategy(PI);
3143 bool hasArmWideBranch(
bool Thumb)
const override {
3144 return Impl.hasArmWideBranch(Thumb);
3147 unsigned getMaxNumArgs()
const override {
3148 return Impl.getMaxNumArgs();
3151 unsigned getNumBytesToPadGlobalArray(
unsigned Size,
3157template <
typename T>
3159 : TTIImpl(new Model<
T>(Impl)) {}
3190 : TTICallback(Arg.TTICallback) {}
3192 : TTICallback(
std::
move(Arg.TTICallback)) {}
3194 TTICallback =
RHS.TTICallback;
3198 TTICallback = std::move(
RHS.TTICallback);
3230 std::optional<TargetTransformInfo>
TTI;
3232 virtual void anchor();
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL