#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
#include <optional>

Macros
#define	DEBUG_TYPE "AMDGPUtti"

Functions
static bool	dependsOnLocalPhi (const Loop L, const Value Cond, unsigned Depth=0)

static bool	intrinsicHasPackedVectorBenefit (Intrinsic::ID ID)

static unsigned	adjustInliningThresholdUsingCallee (const CallBase CB, const SITargetLowering TLI, const GCNTTIImpl *TTIImpl)

static unsigned	getCallArgsTotalAllocaSize (const CallBase *CB, const DataLayout &DL)

Variables
static cl::opt< unsigned >	UnrollThresholdPrivate ("amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), cl::init(2700), cl::Hidden)

static cl::opt< unsigned >	UnrollThresholdLocal ("amdgpu-unroll-threshold-local", cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"), cl::init(1000), cl::Hidden)

static cl::opt< unsigned >	UnrollThresholdIf ("amdgpu-unroll-threshold-if", cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), cl::init(200), cl::Hidden)

static cl::opt< bool >	UnrollRuntimeLocal ("amdgpu-unroll-runtime-local", cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"), cl::init(true), cl::Hidden)

static cl::opt< unsigned >	UnrollMaxBlockToAnalyze ("amdgpu-unroll-max-block-to-analyze", cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"), cl::init(32), cl::Hidden)

static cl::opt< unsigned >	ArgAllocaCost ("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000), cl::desc("Cost of alloca argument"))

static cl::opt< unsigned >	ArgAllocaCutoff ("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost"))

static cl::opt< size_t >	InlineMaxBB ("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)"))

static cl::opt< unsigned >	MemcpyLoopUnroll ("amdgpu-memcpy-loop-unroll", cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop"), cl::init(16), cl::Hidden)

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "AMDGPUtti"

Definition at line 33 of file AMDGPUTargetTransformInfo.cpp.

Function Documentation

◆ adjustInliningThresholdUsingCallee()

static unsigned adjustInliningThresholdUsingCallee	(	const CallBase *	CB,
		const SITargetLowering *	TLI,
		const GCNTTIImpl *	TTIImpl
	)

static

Definition at line 1264 of file AMDGPUTargetTransformInfo.cpp.

References A, llvm::CallBase::args(), llvm::ComputeValueVTs(), DL, llvm::CallBase::getArgOperandNo(), llvm::CallBase::getCallingConv(), llvm::Value::getContext(), llvm::TargetTransformInfoImplBase::getDataLayout(), llvm::InlineConstants::getInstrCost(), llvm::Type::getInt32Ty(), llvm::SITargetLowering::getNumRegistersForCallingConv(), llvm::InstructionCost::getValue(), llvm::AMDGPU::isArgPassedInSGPR(), llvm::AMDGPUAS::PRIVATE_ADDRESS, and llvm::TargetTransformInfo::TCK_SizeAndLatency.

Referenced by llvm::GCNTTIImpl::adjustInliningThreshold().

◆ dependsOnLocalPhi()

static bool dependsOnLocalPhi	(	const Loop *	L,
		const Value *	Cond,
		unsigned	Depth = `0`
	)

static

Definition at line 85 of file AMDGPUTargetTransformInfo.cpp.

References Cond, dependsOnLocalPhi(), llvm::Depth, I, llvm::none_of(), and PHI.

Referenced by dependsOnLocalPhi(), and llvm::AMDGPUTTIImpl::getUnrollingPreferences().

◆ getCallArgsTotalAllocaSize()

static unsigned getCallArgsTotalAllocaSize	(	const CallBase *	CB,
		const DataLayout &	DL
	)

static

Definition at line 1310 of file AMDGPUTargetTransformInfo.cpp.

References llvm::CallBase::args(), DL, llvm::AMDGPUAS::FLAT_ADDRESS, llvm::AllocaInst::getAllocatedType(), llvm::getUnderlyingObject(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::AllocaInst::isStaticAlloca(), and llvm::AMDGPUAS::PRIVATE_ADDRESS.

Referenced by llvm::GCNTTIImpl::adjustInliningThreshold(), and llvm::GCNTTIImpl::getCallerAllocaCost().

◆ intrinsicHasPackedVectorBenefit()

static bool intrinsicHasPackedVectorBenefit ( Intrinsic::ID ID )

static

Definition at line 707 of file AMDGPUTargetTransformInfo.cpp.

Referenced by llvm::GCNTTIImpl::getIntrinsicInstrCost().

Variable Documentation

◆ ArgAllocaCost

cl::opt< unsigned > ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000), cl::desc("Cost of alloca argument"))	(	"amdgpu-inline-arg-alloca-cost"	,
		cl::Hidden	,
		cl::init(4000)	,
		cl::desc("Cost of alloca argument")
	)

static

Referenced by llvm::GCNTTIImpl::adjustInliningThreshold(), and llvm::GCNTTIImpl::getCallerAllocaCost().

◆ ArgAllocaCutoff

cl::opt< unsigned > ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost"))	(	"amdgpu-inline-arg-alloca-cutoff"	,
		cl::Hidden	,
		cl::init(256)	,
		cl::desc("Maximum alloca size to use for inline cost")
	)

static

Referenced by llvm::GCNTTIImpl::getCallerAllocaCost().

◆ InlineMaxBB

cl::opt< size_t > InlineMaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)"))	(	"amdgpu-inline-max-bb"	,
		cl::Hidden	,
		cl::init(1100)	,
		cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)")
	)

static

Referenced by llvm::GCNTTIImpl::areInlineCompatible().

◆ MemcpyLoopUnroll

cl::opt< unsigned > MemcpyLoopUnroll("amdgpu-memcpy-loop-unroll", cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop"), cl::init(16), cl::Hidden)	(	"amdgpu-memcpy-loop-unroll"	,
		cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop")	,
		cl::init(16)	,
		cl::Hidden
	)

static

Referenced by llvm::GCNTTIImpl::getMemcpyLoopLoweringType().

◆ UnrollMaxBlockToAnalyze

cl::opt< unsigned > UnrollMaxBlockToAnalyze("amdgpu-unroll-max-block-to-analyze", cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"), cl::init(32), cl::Hidden)	(	"amdgpu-unroll-max-block-to-analyze"	,
		cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU")	,
		cl::init(32)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().

◆ UnrollRuntimeLocal

cl::opt< bool > UnrollRuntimeLocal("amdgpu-unroll-runtime-local", cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"), cl::init(true), cl::Hidden)	(	"amdgpu-unroll-runtime-local"	,
		cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().

◆ UnrollThresholdIf

cl::opt< unsigned > UnrollThresholdIf("amdgpu-unroll-threshold-if", cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), cl::init(200), cl::Hidden)	(	"amdgpu-unroll-threshold-if"	,
		cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop")	,
		cl::init(200)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().

◆ UnrollThresholdLocal

cl::opt< unsigned > UnrollThresholdLocal("amdgpu-unroll-threshold-local", cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"), cl::init(1000), cl::Hidden)	(	"amdgpu-unroll-threshold-local"	,
		cl::desc("Unroll threshold for AMDGPU if local memory used in a loop")	,
		cl::init(1000)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().

◆ UnrollThresholdPrivate

cl::opt< unsigned > UnrollThresholdPrivate("amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), cl::init(2700), cl::Hidden)	(	"amdgpu-unroll-threshold-private"	,
		cl::desc("Unroll threshold for AMDGPU if private memory used in a loop")	,
		cl::init(2700)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTTIImpl::getUnrollingPreferences().

Macros

Functions

Variables

Macro Definition Documentation

◆ DEBUG_TYPE

Function Documentation

◆ adjustInliningThresholdUsingCallee()

◆ dependsOnLocalPhi()

◆ getCallArgsTotalAllocaSize()

◆ intrinsicHasPackedVectorBenefit()

Variable Documentation

◆ ArgAllocaCost

◆ ArgAllocaCutoff

◆ InlineMaxBB

◆ MemcpyLoopUnroll

◆ UnrollMaxBlockToAnalyze

◆ UnrollRuntimeLocal

◆ UnrollThresholdIf

◆ UnrollThresholdLocal

◆ UnrollThresholdPrivate