LLVM 20.0.0git
|
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
#include <optional>
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "AMDGPUtti" |
Functions | |
static bool | dependsOnLocalPhi (const Loop *L, const Value *Cond, unsigned Depth=0) |
static bool | intrinsicHasPackedVectorBenefit (Intrinsic::ID ID) |
static unsigned | adjustInliningThresholdUsingCallee (const CallBase *CB, const SITargetLowering *TLI, const GCNTTIImpl *TTIImpl) |
static unsigned | getCallArgsTotalAllocaSize (const CallBase *CB, const DataLayout &DL) |
Variables | |
static cl::opt< unsigned > | UnrollThresholdPrivate ("amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), cl::init(2700), cl::Hidden) |
static cl::opt< unsigned > | UnrollThresholdLocal ("amdgpu-unroll-threshold-local", cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"), cl::init(1000), cl::Hidden) |
static cl::opt< unsigned > | UnrollThresholdIf ("amdgpu-unroll-threshold-if", cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), cl::init(200), cl::Hidden) |
static cl::opt< bool > | UnrollRuntimeLocal ("amdgpu-unroll-runtime-local", cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"), cl::init(true), cl::Hidden) |
static cl::opt< unsigned > | UnrollMaxBlockToAnalyze ("amdgpu-unroll-max-block-to-analyze", cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"), cl::init(32), cl::Hidden) |
static cl::opt< unsigned > | ArgAllocaCost ("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000), cl::desc("Cost of alloca argument")) |
static cl::opt< unsigned > | ArgAllocaCutoff ("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost")) |
static cl::opt< size_t > | InlineMaxBB ("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)")) |
static cl::opt< unsigned > | MemcpyLoopUnroll ("amdgpu-memcpy-loop-unroll", cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop"), cl::init(16), cl::Hidden) |
#define DEBUG_TYPE "AMDGPUtti" |
Definition at line 33 of file AMDGPUTargetTransformInfo.cpp.
|
static |
Definition at line 1264 of file AMDGPUTargetTransformInfo.cpp.
References A, llvm::CallBase::args(), llvm::ComputeValueVTs(), DL, llvm::CallBase::getArgOperandNo(), llvm::CallBase::getCallingConv(), llvm::Value::getContext(), llvm::TargetTransformInfoImplBase::getDataLayout(), llvm::InlineConstants::getInstrCost(), llvm::Type::getInt32Ty(), llvm::SITargetLowering::getNumRegistersForCallingConv(), llvm::InstructionCost::getValue(), llvm::AMDGPU::isArgPassedInSGPR(), llvm::AMDGPUAS::PRIVATE_ADDRESS, and llvm::TargetTransformInfo::TCK_SizeAndLatency.
Referenced by llvm::GCNTTIImpl::adjustInliningThreshold().
Definition at line 85 of file AMDGPUTargetTransformInfo.cpp.
References Cond, dependsOnLocalPhi(), llvm::Depth, I, llvm::none_of(), and PHI.
Referenced by dependsOnLocalPhi(), and llvm::AMDGPUTTIImpl::getUnrollingPreferences().
|
static |
Definition at line 1310 of file AMDGPUTargetTransformInfo.cpp.
References llvm::CallBase::args(), DL, llvm::AMDGPUAS::FLAT_ADDRESS, llvm::AllocaInst::getAllocatedType(), llvm::getUnderlyingObject(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::AllocaInst::isStaticAlloca(), and llvm::AMDGPUAS::PRIVATE_ADDRESS.
Referenced by llvm::GCNTTIImpl::adjustInliningThreshold(), and llvm::GCNTTIImpl::getCallerAllocaCost().
|
static |
Definition at line 707 of file AMDGPUTargetTransformInfo.cpp.
Referenced by llvm::GCNTTIImpl::getIntrinsicInstrCost().
|
static |
Referenced by llvm::GCNTTIImpl::adjustInliningThreshold(), and llvm::GCNTTIImpl::getCallerAllocaCost().
|
static |
Referenced by llvm::GCNTTIImpl::getCallerAllocaCost().
|
static |
Referenced by llvm::GCNTTIImpl::areInlineCompatible().
|
static |
Referenced by llvm::GCNTTIImpl::getMemcpyLoopLoweringType().
|
static |
Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().
|
static |
Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().
|
static |
Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().
|
static |
Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().
|
static |
Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().