21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
45class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
65class TargetLibraryInfo;
121 Type *RetTy =
nullptr;
134 bool TypeBasedOnly =
false);
207class TargetTransformInfo;
338 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
346 const PointersChainInfo &
Info,
Type *AccessTy,
497 std::pair<const Value *, unsigned>
698 KnownBits & Known,
bool &KnownBitsComputed)
const;
705 SimplifyAndSetOp)
const;
749 bool HasBaseReg, int64_t Scale,
751 int64_t ScalableOffset = 0)
const;
827 Align Alignment,
unsigned AddrSpace)
const;
871 unsigned AddrSpace = 0)
const;
915 unsigned ScalarOpdIdx)
const;
933 const APInt &DemandedElts,
934 bool Insert,
bool Extract,
943 ArrayRef<Type *> Tys,
999 bool IsZeroCmp)
const;
1031 unsigned *
Fast =
nullptr)
const;
1216 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1227 Type *ScalarValTy)
const;
1233 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1280 unsigned NumStridedMemAccesses,
1281 unsigned NumPrefetches,
bool HasCall)
const;
1306 std::optional<unsigned> BinOp = std::nullopt)
const;
1335 unsigned Opcode,
Type *Ty,
1339 ArrayRef<const Value *>
Args = {},
const Instruction *CxtI =
nullptr,
1340 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1350 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1351 const SmallBitVector &OpcodeMask,
1366 ArrayRef<const Value *>
Args = {},
1367 const Instruction *CxtI =
nullptr)
const;
1418 unsigned Index)
const;
1440 const Instruction *
I =
nullptr)
const;
1449 unsigned Index = -1, Value *Op0 =
nullptr,
1450 Value *Op1 =
nullptr)
const;
1464 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const;
1474 unsigned Index = -1)
const;
1483 const APInt &DemandedDstElts,
1492 const Instruction *
I =
nullptr)
const;
1516 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1530 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1548 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1553 return FMF && !(*FMF).allowReassoc();
1581 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1630 const SCEV *
Ptr =
nullptr)
const;
1654 Type *ExpectedType)
const;
1659 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
1660 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1670 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1672 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1686 unsigned DefaultCallPenalty)
const;
1722 unsigned AddrSpace)
const;
1726 unsigned AddrSpace)
const;
1738 unsigned ChainSizeInBytes,
1744 unsigned ChainSizeInBytes,
1816 Align Alignment)
const;
1901 template <
typename T>
class Model;
1903 std::unique_ptr<Concept> TTIImpl;
1951 virtual std::pair<const Value *, unsigned>
1955 Value *NewV)
const = 0;
1974 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1979 SimplifyAndSetOp) = 0;
1984 int64_t BaseOffset,
bool HasBaseReg,
1985 int64_t Scale,
unsigned AddrSpace,
1987 int64_t ScalableOffset) = 0;
2008 Align Alignment) = 0;
2010 Align Alignment) = 0;
2016 unsigned AddrSpace) = 0;
2028 bool HasBaseReg, int64_t Scale,
2029 unsigned AddrSpace) = 0;
2042 unsigned ScalarOpdIdx) = 0;
2071 unsigned *
Fast) = 0;
2093 Type *Ty =
nullptr)
const = 0;
2103 bool IsScalable)
const = 0;
2104 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
2106 Type *ScalarValTy)
const = 0;
2108 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2127 unsigned NumStridedMemAccesses,
2128 unsigned NumPrefetches,
2129 bool HasCall)
const = 0;
2154 std::optional<unsigned> BinOp)
const = 0;
2162 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2176 unsigned Index) = 0;
2197 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) = 0;
2201 unsigned Index) = 0;
2205 const APInt &DemandedDstElts,
2223 bool VariableMask,
Align Alignment,
2228 bool VariableMask,
Align Alignment,
2235 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2238 std::optional<FastMathFlags> FMF,
2265 Type *ExpectedType) = 0;
2268 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
2269 std::optional<uint32_t> AtomicElementSize)
const = 0;
2273 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2275 std::optional<uint32_t> AtomicCpySize)
const = 0;
2279 unsigned DefaultCallPenalty)
const = 0;
2290 unsigned AddrSpace)
const = 0;
2293 unsigned AddrSpace)
const = 0;
2298 unsigned ChainSizeInBytes,
2301 unsigned ChainSizeInBytes,
2318 Align Alignment)
const = 0;
2334template <
typename T>
2339 Model(
T Impl) : Impl(std::move(Impl)) {}
2340 ~Model()
override =
default;
2342 const DataLayout &getDataLayout()
const override {
2343 return Impl.getDataLayout();
2347 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2348 ArrayRef<const Value *>
Operands, Type *AccessType,
2352 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2354 const PointersChainInfo &
Info,
2359 unsigned getInliningThresholdMultiplier()
const override {
2360 return Impl.getInliningThresholdMultiplier();
2362 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2363 return Impl.adjustInliningThreshold(CB);
2365 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2366 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2368 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2369 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2371 int getInliningLastCallToStaticBonus()
const override {
2372 return Impl.getInliningLastCallToStaticBonus();
2374 int getInlinerVectorBonusPercent()
const override {
2375 return Impl.getInlinerVectorBonusPercent();
2377 unsigned getCallerAllocaCost(
const CallBase *CB,
2378 const AllocaInst *AI)
const override {
2379 return Impl.getCallerAllocaCost(CB, AI);
2381 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2382 return Impl.getMemcpyCost(
I);
2385 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2386 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2389 InstructionCost getInstructionCost(
const User *U,
2394 BranchProbability getPredictableBranchThreshold()
override {
2395 return Impl.getPredictableBranchThreshold();
2397 InstructionCost getBranchMispredictPenalty()
override {
2398 return Impl.getBranchMispredictPenalty();
2400 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2401 return Impl.hasBranchDivergence(
F);
2403 bool isSourceOfDivergence(
const Value *V)
override {
2404 return Impl.isSourceOfDivergence(V);
2407 bool isAlwaysUniform(
const Value *V)
override {
2408 return Impl.isAlwaysUniform(V);
2411 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2412 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2415 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2416 return Impl.addrspacesMayAlias(AS0, AS1);
2419 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2421 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2423 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2426 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2427 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2431 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2432 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2435 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2436 return Impl.getAssumedAddrSpace(V);
2439 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2441 std::pair<const Value *, unsigned>
2442 getPredicatedAddrSpace(
const Value *V)
const override {
2443 return Impl.getPredicatedAddrSpace(V);
2446 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2447 Value *NewV)
const override {
2448 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2451 bool isLoweredToCall(
const Function *
F)
override {
2452 return Impl.isLoweredToCall(
F);
2454 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2455 UnrollingPreferences &UP,
2456 OptimizationRemarkEmitter *ORE)
override {
2457 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2459 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2460 PeelingPreferences &PP)
override {
2461 return Impl.getPeelingPreferences(L, SE, PP);
2463 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2464 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2465 HardwareLoopInfo &HWLoopInfo)
override {
2466 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2468 unsigned getEpilogueVectorizationMinVF()
override {
2469 return Impl.getEpilogueVectorizationMinVF();
2471 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2472 return Impl.preferPredicateOverEpilogue(TFI);
2475 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2476 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2478 std::optional<Instruction *>
2479 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2480 return Impl.instCombineIntrinsic(IC,
II);
2482 std::optional<Value *>
2483 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2484 APInt DemandedMask, KnownBits &Known,
2485 bool &KnownBitsComputed)
override {
2486 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2489 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2490 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2491 APInt &UndefElts2, APInt &UndefElts3,
2492 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2493 SimplifyAndSetOp)
override {
2494 return Impl.simplifyDemandedVectorEltsIntrinsic(
2495 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2498 bool isLegalAddImmediate(int64_t Imm)
override {
2499 return Impl.isLegalAddImmediate(Imm);
2501 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2502 return Impl.isLegalAddScalableImmediate(Imm);
2504 bool isLegalICmpImmediate(int64_t Imm)
override {
2505 return Impl.isLegalICmpImmediate(Imm);
2507 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2508 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2509 Instruction *
I, int64_t ScalableOffset)
override {
2510 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2511 AddrSpace,
I, ScalableOffset);
2513 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2514 const TargetTransformInfo::LSRCost &C2)
override {
2515 return Impl.isLSRCostLess(C1, C2);
2517 bool isNumRegsMajorCostOfLSR()
override {
2518 return Impl.isNumRegsMajorCostOfLSR();
2520 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2521 return Impl.shouldDropLSRSolutionIfLessProfitable();
2523 bool isProfitableLSRChainElement(Instruction *
I)
override {
2524 return Impl.isProfitableLSRChainElement(
I);
2526 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2527 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2528 DominatorTree *DT, AssumptionCache *AC,
2529 TargetLibraryInfo *LibInfo)
override {
2530 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2533 getPreferredAddressingMode(
const Loop *L,
2534 ScalarEvolution *SE)
const override {
2535 return Impl.getPreferredAddressingMode(L, SE);
2537 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2538 return Impl.isLegalMaskedStore(DataType, Alignment);
2540 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2541 return Impl.isLegalMaskedLoad(DataType, Alignment);
2543 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2544 return Impl.isLegalNTStore(DataType, Alignment);
2546 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2547 return Impl.isLegalNTLoad(DataType, Alignment);
2549 bool isLegalBroadcastLoad(Type *ElementTy,
2550 ElementCount NumElements)
const override {
2551 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2553 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2554 return Impl.isLegalMaskedScatter(DataType, Alignment);
2556 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2557 return Impl.isLegalMaskedGather(DataType, Alignment);
2559 bool forceScalarizeMaskedGather(
VectorType *DataType,
2560 Align Alignment)
override {
2561 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2563 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2564 Align Alignment)
override {
2565 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2567 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2568 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2570 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2571 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2573 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2574 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2576 bool isLegalInterleavedAccessType(
VectorType *VTy,
unsigned Factor,
2578 unsigned AddrSpace)
override {
2579 return Impl.isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace);
2581 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2582 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2584 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2585 const SmallBitVector &OpcodeMask)
const override {
2586 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2588 bool enableOrderedReductions()
override {
2589 return Impl.enableOrderedReductions();
2591 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2592 return Impl.hasDivRemOp(DataType, IsSigned);
2594 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2595 return Impl.hasVolatileVariant(
I, AddrSpace);
2597 bool prefersVectorizedAddressing()
override {
2598 return Impl.prefersVectorizedAddressing();
2600 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2601 StackOffset BaseOffset,
bool HasBaseReg,
2603 unsigned AddrSpace)
override {
2604 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2607 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2608 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2609 return Impl.isTruncateFree(Ty1, Ty2);
2611 bool isProfitableToHoist(Instruction *
I)
override {
2612 return Impl.isProfitableToHoist(
I);
2614 bool useAA()
override {
return Impl.useAA(); }
2615 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2616 unsigned getRegUsageForType(Type *Ty)
override {
2617 return Impl.getRegUsageForType(Ty);
2619 bool shouldBuildLookupTables()
override {
2620 return Impl.shouldBuildLookupTables();
2622 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2623 return Impl.shouldBuildLookupTablesForConstant(
C);
2625 bool shouldBuildRelLookupTables()
override {
2626 return Impl.shouldBuildRelLookupTables();
2628 bool useColdCCForColdCall(Function &
F)
override {
2629 return Impl.useColdCCForColdCall(
F);
2631 bool isTargetIntrinsicTriviallyScalarizable(
Intrinsic::ID ID)
override {
2632 return Impl.isTargetIntrinsicTriviallyScalarizable(
ID);
2636 unsigned ScalarOpdIdx)
override {
2637 return Impl.isTargetIntrinsicWithScalarOpAtArg(
ID, ScalarOpdIdx);
2641 int OpdIdx)
override {
2642 return Impl.isTargetIntrinsicWithOverloadTypeAtArg(
ID, OpdIdx);
2645 bool isTargetIntrinsicWithStructReturnOverloadAtField(
Intrinsic::ID ID,
2646 int RetIdx)
override {
2647 return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(
ID, RetIdx);
2650 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2651 const APInt &DemandedElts,
2652 bool Insert,
bool Extract,
2654 ArrayRef<Value *> VL = {})
override {
2655 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2659 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2660 ArrayRef<Type *> Tys,
2662 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2665 bool supportsEfficientVectorElementLoadStore()
override {
2666 return Impl.supportsEfficientVectorElementLoadStore();
2669 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2670 bool supportsTailCallFor(
const CallBase *CB)
override {
2671 return Impl.supportsTailCallFor(CB);
2674 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2675 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2677 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2678 bool IsZeroCmp)
const override {
2679 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2681 bool enableSelectOptimize()
override {
2682 return Impl.enableSelectOptimize();
2684 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2685 return Impl.shouldTreatInstructionLikeSelect(
I);
2687 bool enableInterleavedAccessVectorization()
override {
2688 return Impl.enableInterleavedAccessVectorization();
2690 bool enableMaskedInterleavedAccessVectorization()
override {
2691 return Impl.enableMaskedInterleavedAccessVectorization();
2693 bool isFPVectorizationPotentiallyUnsafe()
override {
2694 return Impl.isFPVectorizationPotentiallyUnsafe();
2696 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2698 unsigned *
Fast)
override {
2703 return Impl.getPopcntSupport(IntTyWidthInBit);
2705 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2707 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2708 return Impl.isExpensiveToSpeculativelyExecute(
I);
2711 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2712 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2715 InstructionCost getFPOpCost(Type *Ty)
override {
2716 return Impl.getFPOpCost(Ty);
2719 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2720 const APInt &Imm, Type *Ty)
override {
2721 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2723 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2725 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2727 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2728 const APInt &Imm, Type *Ty,
2730 Instruction *Inst =
nullptr)
override {
2731 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2734 const APInt &Imm, Type *Ty,
2736 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2738 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2739 const Function &Fn)
const override {
2740 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2742 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2743 return Impl.getNumberOfRegisters(ClassID);
2745 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2746 return Impl.hasConditionalLoadStoreForType(Ty);
2748 unsigned getRegisterClassForType(
bool Vector,
2749 Type *Ty =
nullptr)
const override {
2750 return Impl.getRegisterClassForType(
Vector, Ty);
2752 const char *getRegisterClassName(
unsigned ClassID)
const override {
2753 return Impl.getRegisterClassName(ClassID);
2755 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2756 return Impl.getRegisterBitWidth(K);
2758 unsigned getMinVectorRegisterBitWidth()
const override {
2759 return Impl.getMinVectorRegisterBitWidth();
2761 std::optional<unsigned>
getMaxVScale()
const override {
2762 return Impl.getMaxVScale();
2764 std::optional<unsigned> getVScaleForTuning()
const override {
2765 return Impl.getVScaleForTuning();
2767 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2768 return Impl.isVScaleKnownToBeAPowerOfTwo();
2770 bool shouldMaximizeVectorBandwidth(
2772 return Impl.shouldMaximizeVectorBandwidth(K);
2774 ElementCount getMinimumVF(
unsigned ElemWidth,
2775 bool IsScalable)
const override {
2776 return Impl.getMinimumVF(ElemWidth, IsScalable);
2778 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2779 return Impl.getMaximumVF(ElemWidth, Opcode);
2781 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2782 Type *ScalarValTy)
const override {
2783 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2785 bool shouldConsiderAddressTypePromotion(
2786 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2787 return Impl.shouldConsiderAddressTypePromotion(
2788 I, AllowPromotionWithoutCommonHeader);
2790 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2791 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2792 return Impl.getCacheSize(Level);
2794 std::optional<unsigned>
2795 getCacheAssociativity(
CacheLevel Level)
const override {
2796 return Impl.getCacheAssociativity(Level);
2799 std::optional<unsigned> getMinPageSize()
const override {
2800 return Impl.getMinPageSize();
2805 unsigned getPrefetchDistance()
const override {
2806 return Impl.getPrefetchDistance();
2812 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2813 unsigned NumStridedMemAccesses,
2814 unsigned NumPrefetches,
2815 bool HasCall)
const override {
2816 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2817 NumPrefetches, HasCall);
2823 unsigned getMaxPrefetchIterationsAhead()
const override {
2824 return Impl.getMaxPrefetchIterationsAhead();
2828 bool enableWritePrefetching()
const override {
2829 return Impl.enableWritePrefetching();
2833 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2834 return Impl.shouldPrefetchAddressSpace(AS);
2837 InstructionCost getPartialReductionCost(
2838 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
2841 std::optional<unsigned> BinOp = std::nullopt)
const override {
2842 return Impl.getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
2843 AccumType, VF, OpAExtend, OpBExtend,
2847 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2848 return Impl.getMaxInterleaveFactor(VF);
2850 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2852 ProfileSummaryInfo *PSI,
2853 BlockFrequencyInfo *BFI)
override {
2854 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2856 InstructionCost getArithmeticInstrCost(
2858 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2859 ArrayRef<const Value *> Args,
2860 const Instruction *CxtI =
nullptr)
override {
2861 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2864 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2866 const SmallBitVector &OpcodeMask,
2868 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2875 ArrayRef<const Value *> Args,
2876 const Instruction *CxtI)
override {
2877 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2880 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2883 const Instruction *
I)
override {
2884 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2886 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2888 unsigned Index)
override {
2889 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2892 const Instruction *
I =
nullptr)
override {
2893 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2895 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2898 OperandValueInfo Op1Info,
2899 OperandValueInfo Op2Info,
2900 const Instruction *
I)
override {
2901 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
2902 Op1Info, Op2Info,
I);
2904 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2906 unsigned Index, Value *Op0,
2907 Value *Op1)
override {
2908 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2910 InstructionCost getVectorInstrCost(
2913 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
override {
2914 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Scalar,
2917 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2919 unsigned Index)
override {
2923 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2924 const APInt &DemandedDstElts,
2926 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2929 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2932 OperandValueInfo OpInfo,
2933 const Instruction *
I)
override {
2937 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2940 const Instruction *
I)
override {
2941 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2944 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2947 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2951 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2952 bool VariableMask, Align Alignment,
2954 const Instruction *
I =
nullptr)
override {
2955 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2959 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2960 bool VariableMask, Align Alignment,
2962 const Instruction *
I =
nullptr)
override {
2963 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2966 InstructionCost getInterleavedMemoryOpCost(
2967 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2969 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2970 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2972 UseMaskForCond, UseMaskForGaps);
2975 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2976 std::optional<FastMathFlags> FMF,
2978 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2983 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2986 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2989 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2993 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2995 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2997 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2999 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
3001 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
3002 ArrayRef<Type *> Tys,
3006 unsigned getNumberOfParts(Type *Tp)
override {
3007 return Impl.getNumberOfParts(Tp);
3009 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
3010 const SCEV *
Ptr)
override {
3011 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
3013 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
3014 return Impl.getCostOfKeepingLiveOverCall(Tys);
3016 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
3017 MemIntrinsicInfo &
Info)
override {
3018 return Impl.getTgtMemIntrinsic(Inst,
Info);
3020 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
3021 return Impl.getAtomicMemIntrinsicMaxElementSize();
3023 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
3024 Type *ExpectedType)
override {
3025 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
3027 Type *getMemcpyLoopLoweringType(
3028 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
3029 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
3030 std::optional<uint32_t> AtomicElementSize)
const override {
3031 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
3032 DestAddrSpace, SrcAlign, DestAlign,
3035 void getMemcpyLoopResidualLoweringType(
3036 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
3037 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
3038 Align SrcAlign, Align DestAlign,
3039 std::optional<uint32_t> AtomicCpySize)
const override {
3040 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
3041 SrcAddrSpace, DestAddrSpace,
3042 SrcAlign, DestAlign, AtomicCpySize);
3045 const Function *Callee)
const override {
3046 return Impl.areInlineCompatible(Caller, Callee);
3048 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
3049 unsigned DefaultCallPenalty)
const override {
3050 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
3052 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
3053 const ArrayRef<Type *> &Types)
const override {
3054 return Impl.areTypesABICompatible(Caller, Callee, Types);
3057 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
3060 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
3062 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
3063 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
3065 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
3066 return Impl.isLegalToVectorizeLoad(LI);
3068 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
3069 return Impl.isLegalToVectorizeStore(SI);
3071 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
3072 unsigned AddrSpace)
const override {
3073 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
3076 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
3077 unsigned AddrSpace)
const override {
3078 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
3081 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
3082 ElementCount VF)
const override {
3083 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
3085 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
3086 return Impl.isElementTypeLegalForScalableVector(Ty);
3088 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
3089 unsigned ChainSizeInBytes,
3091 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
3093 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
3094 unsigned ChainSizeInBytes,
3096 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
3098 bool preferFixedOverScalableIfEqualCost()
const override {
3099 return Impl.preferFixedOverScalableIfEqualCost();
3101 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
3102 ReductionFlags Flags)
const override {
3103 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
3105 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
3106 ReductionFlags Flags)
const override {
3107 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
3109 bool preferEpilogueVectorization()
const override {
3110 return Impl.preferEpilogueVectorization();
3113 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
3114 return Impl.shouldExpandReduction(
II);
3118 getPreferredExpandedReductionShuffle(
const IntrinsicInst *
II)
const override {
3119 return Impl.getPreferredExpandedReductionShuffle(
II);
3122 unsigned getGISelRematGlobalCost()
const override {
3123 return Impl.getGISelRematGlobalCost();
3126 unsigned getMinTripCountTailFoldingThreshold()
const override {
3127 return Impl.getMinTripCountTailFoldingThreshold();
3130 bool supportsScalableVectors()
const override {
3131 return Impl.supportsScalableVectors();
3134 bool enableScalableVectorization()
const override {
3135 return Impl.enableScalableVectorization();
3138 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
3139 Align Alignment)
const override {
3140 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
3143 bool isProfitableToSinkOperands(Instruction *
I,
3144 SmallVectorImpl<Use *> &Ops)
const override {
3145 return Impl.isProfitableToSinkOperands(
I, Ops);
3148 bool isVectorShiftByScalarCheap(Type *Ty)
const override {
3149 return Impl.isVectorShiftByScalarCheap(Ty);
3153 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
3154 return Impl.getVPLegalizationStrategy(PI);
3157 bool hasArmWideBranch(
bool Thumb)
const override {
3158 return Impl.hasArmWideBranch(Thumb);
3161 uint64_t getFeatureMask(
const Function &
F)
const override {
3162 return Impl.getFeatureMask(
F);
3165 bool isMultiversionedFunction(
const Function &
F)
const override {
3166 return Impl.isMultiversionedFunction(
F);
3169 unsigned getMaxNumArgs()
const override {
3170 return Impl.getMaxNumArgs();
3173 unsigned getNumBytesToPadGlobalArray(
unsigned Size,
3179template <
typename T>
3181 : TTIImpl(new Model<
T>(Impl)) {}
3212 : TTICallback(Arg.TTICallback) {}
3214 : TTICallback(
std::
move(Arg.TTICallback)) {}
3216 TTICallback =
RHS.TTICallback;
3220 TTICallback = std::move(
RHS.TTICallback);
3252 std::optional<TargetTransformInfo>
TTI;
3254 virtual void anchor();
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL