Go to the documentation of this file.
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
25 class AMDGPUTargetMachine;
29 class ScalarEvolution;
30 class SITargetLowering;
69 bool HasFP32Denormals;
70 bool HasFP64FP16Denormals;
77 static inline int getFullRateInstrCost() {
118 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
120 unsigned ChainSizeInBytes,
123 unsigned ChainSizeInBytes,
128 unsigned AddrSpace)
const;
130 unsigned AddrSpace)
const;
132 unsigned AddrSpace)
const;
134 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
135 unsigned SrcAlign,
unsigned DestAlign,
140 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
141 unsigned SrcAlign,
unsigned DestAlign,
194 SimplifyAndSetOp)
const;
224 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
This is an optimization pass for GlobalISel generic memory operations.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Represents a single loop in the control flow graph.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicCpySize) const
bool isSourceOfDivergence(const Value *V) const
The main scalar evolution driver.
Triple - Helper class for working with autoconf configuration names.
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getMaxInterleaveFactor(unsigned VF)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Container class for subtarget features.
unsigned getNumberOfRegisters(unsigned RCID) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getMinVectorRegisterBitWidth() const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
@ LOCAL_ADDRESS
Address space for local memory.
unsigned getInliningThresholdMultiplier()
Analysis containing CSE Info
bool isAlwaysUniform(const Value *V) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Base class of all SIMD vector types.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool hasBranchDivergence()
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
This is an important class for using LLVM in a threaded context.
Base class which can be used to help build a TTI implementation.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
print Print MemDeps of function
int getInlinerVectorBonusPercent()
InstructionCost getVectorSplitCost()
@ PRIVATE_ADDRESS
Address space for private memory.
Class for arbitrary precision integers.
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const
TargetSubtargetInfo - Generic base class for all target subtargets.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicElementSize) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
@ REGION_ADDRESS
Address space for region memory. (GDS)
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned adjustInliningThreshold(const CallBase *CB) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
unsigned getFlatAddressSpace() const
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
@ FLAT_ADDRESS
Address space for flat memory.
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Information about a load/store intrinsic defined by the target.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
const char LLVMTargetMachineRef TM
This class represents a function call, abstracting a target machine's calling convention.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool useGPUDivergenceAnalysis() const
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, InstCombiner &IC) const
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
LLVM Value Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)