LLVM 20.0.0git
Macros | Functions | Variables
AMDGPUTargetTransformInfo.cpp File Reference
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
#include <optional>

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "AMDGPUtti"
 

Functions

static bool dependsOnLocalPhi (const Loop *L, const Value *Cond, unsigned Depth=0)
 
static bool intrinsicHasPackedVectorBenefit (Intrinsic::ID ID)
 
static unsigned adjustInliningThresholdUsingCallee (const CallBase *CB, const SITargetLowering *TLI, const GCNTTIImpl *TTIImpl)
 
static unsigned getCallArgsTotalAllocaSize (const CallBase *CB, const DataLayout &DL)
 

Variables

static cl::opt< unsignedUnrollThresholdPrivate ("amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), cl::init(2700), cl::Hidden)
 
static cl::opt< unsignedUnrollThresholdLocal ("amdgpu-unroll-threshold-local", cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"), cl::init(1000), cl::Hidden)
 
static cl::opt< unsignedUnrollThresholdIf ("amdgpu-unroll-threshold-if", cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), cl::init(200), cl::Hidden)
 
static cl::opt< boolUnrollRuntimeLocal ("amdgpu-unroll-runtime-local", cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"), cl::init(true), cl::Hidden)
 
static cl::opt< unsignedUnrollMaxBlockToAnalyze ("amdgpu-unroll-max-block-to-analyze", cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"), cl::init(32), cl::Hidden)
 
static cl::opt< unsignedArgAllocaCost ("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000), cl::desc("Cost of alloca argument"))
 
static cl::opt< unsignedArgAllocaCutoff ("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost"))
 
static cl::opt< size_t > InlineMaxBB ("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)"))
 
static cl::opt< unsignedMemcpyLoopUnroll ("amdgpu-memcpy-loop-unroll", cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop"), cl::init(16), cl::Hidden)
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "AMDGPUtti"

Definition at line 33 of file AMDGPUTargetTransformInfo.cpp.

Function Documentation

◆ adjustInliningThresholdUsingCallee()

static unsigned adjustInliningThresholdUsingCallee ( const CallBase CB,
const SITargetLowering TLI,
const GCNTTIImpl TTIImpl 
)
static

◆ dependsOnLocalPhi()

static bool dependsOnLocalPhi ( const Loop L,
const Value Cond,
unsigned  Depth = 0 
)
static

◆ getCallArgsTotalAllocaSize()

static unsigned getCallArgsTotalAllocaSize ( const CallBase CB,
const DataLayout DL 
)
static

◆ intrinsicHasPackedVectorBenefit()

static bool intrinsicHasPackedVectorBenefit ( Intrinsic::ID  ID)
static

Variable Documentation

◆ ArgAllocaCost

cl::opt< unsigned > ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000), cl::desc("Cost of alloca argument")) ( "amdgpu-inline-arg-alloca-cost"  ,
cl::Hidden  ,
cl::init(4000)  ,
cl::desc("Cost of alloca argument")   
)
static

◆ ArgAllocaCutoff

cl::opt< unsigned > ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost")) ( "amdgpu-inline-arg-alloca-cutoff"  ,
cl::Hidden  ,
cl::init(256)  ,
cl::desc("Maximum alloca size to use for inline cost")   
)
static

◆ InlineMaxBB

cl::opt< size_t > InlineMaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)")) ( "amdgpu-inline-max-bb"  ,
cl::Hidden  ,
cl::init(1100)  ,
cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)")   
)
static

◆ MemcpyLoopUnroll

cl::opt< unsigned > MemcpyLoopUnroll("amdgpu-memcpy-loop-unroll", cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop"), cl::init(16), cl::Hidden) ( "amdgpu-memcpy-loop-unroll"  ,
cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop")  ,
cl::init(16)  ,
cl::Hidden   
)
static

◆ UnrollMaxBlockToAnalyze

cl::opt< unsigned > UnrollMaxBlockToAnalyze("amdgpu-unroll-max-block-to-analyze", cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"), cl::init(32), cl::Hidden) ( "amdgpu-unroll-max-block-to-analyze"  ,
cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU")  ,
cl::init(32)  ,
cl::Hidden   
)
static

◆ UnrollRuntimeLocal

cl::opt< bool > UnrollRuntimeLocal("amdgpu-unroll-runtime-local", cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"), cl::init(true), cl::Hidden) ( "amdgpu-unroll-runtime-local"  ,
cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ UnrollThresholdIf

cl::opt< unsigned > UnrollThresholdIf("amdgpu-unroll-threshold-if", cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), cl::init(200), cl::Hidden) ( "amdgpu-unroll-threshold-if"  ,
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop")  ,
cl::init(200)  ,
cl::Hidden   
)
static

◆ UnrollThresholdLocal

cl::opt< unsigned > UnrollThresholdLocal("amdgpu-unroll-threshold-local", cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"), cl::init(1000), cl::Hidden) ( "amdgpu-unroll-threshold-local"  ,
cl::desc("Unroll threshold for AMDGPU if local memory used in a loop")  ,
cl::init(1000)  ,
cl::Hidden   
)
static

◆ UnrollThresholdPrivate

cl::opt< unsigned > UnrollThresholdPrivate("amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), cl::init(2700), cl::Hidden) ( "amdgpu-unroll-threshold-private"  ,
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop")  ,
cl::init(2700)  ,
cl::Hidden   
)
static