#include "Target/AMDGPU/AMDGPUSubtarget.h"

Inheritance diagram for llvm::AMDGPUSubtarget:

Public Types
enum	Generation { INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 , NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 , GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 , GFX13 = 12 }

Public Member Functions
	AMDGPUSubtarget (Triple TT)
std::pair< unsigned, unsigned >	getDefaultFlatWorkGroupSize (CallingConv::ID CC) const
std::pair< unsigned, unsigned >	getFlatWorkGroupSizes (const Function &F) const
std::optional< unsigned >	getReqdWorkGroupSize (const Function &F, unsigned Dim) const
bool	hasWavefrontsEvenlySplittingXDim (const Function &F, bool REquiresUniformYZ=false) const
std::pair< unsigned, unsigned >	getWavesPerEU (const Function &F) const
std::pair< unsigned, unsigned >	getWavesPerEU (std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes, const Function &F) const
	Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.
std::pair< unsigned, unsigned >	getEffectiveWavesPerEU (std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
	Returns the target minimum/maximum number of waves per EU.
unsigned	getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
	Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F) const
	Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is `F` and each workgroup running the function requires `LDSBytes` bytes of LDS space.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
	Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (const MachineFunction &MF) const
	Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is `MF`.
bool	isAmdHsaOS () const
bool	isAmdPalOS () const
bool	isMesa3DOS () const
bool	isMesaKernel (const Function &F) const
bool	isAmdHsaOrMesa (const Function &F) const
bool	isGCN () const
bool	useRealTrue16Insts () const
	Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool	hasMulI24 () const
bool	hasMulU24 () const
bool	hasSMulHi () const
bool	hasFminFmaxLegacy () const
unsigned	getWavefrontSize () const
unsigned	getWavefrontSizeLog2 () const
unsigned	getLocalMemorySize () const
	Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
unsigned	getAddressableLocalMemorySize () const
	Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned	getEUsPerCU () const
	Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
Align	getAlignmentForImplicitArgPtr () const
unsigned	getExplicitKernelArgOffset () const
	Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
virtual unsigned	getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0
virtual unsigned	getMinFlatWorkGroupSize () const =0
virtual unsigned	getMaxFlatWorkGroupSize () const =0
virtual unsigned	getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0
virtual unsigned	getMinWavesPerEU () const =0
unsigned	getMaxWavesPerEU () const
unsigned	getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
	Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
SmallVector< unsigned >	getMaxNumWorkGroups (const Function &F) const
	Return the number of work groups for the function.
bool	isSingleLaneExecution (const Function &Kernel) const
	Return true if only a single workitem can be active in a wave.
bool	makeLIDRangeMetadata (Instruction *I) const
	Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned	getImplicitArgNumBytes (const Function &F) const
uint64_t	getExplicitKernArgSize (const Function &F, Align &MaxAlign) const
unsigned	getKernArgSegmentSize (const Function &F, Align &MaxAlign) const
AMDGPUDwarfFlavour	getAMDGPUDwarfFlavour () const
virtual	~AMDGPUSubtarget ()=default

Static Public Member Functions
static const AMDGPUSubtarget &	get (const MachineFunction &MF)
static const AMDGPUSubtarget &	get (const TargetMachine &TM, const Function &F)

Protected Attributes
bool	HasMulI24 = true
bool	HasMulU24 = true
bool	HasSMulHi = false
bool	HasFminFmaxLegacy = true
unsigned	EUsPerCU = 4
unsigned	MaxWavesPerEU = 10
unsigned	LocalMemorySize = 0
unsigned	AddressableLocalMemorySize = 0
char	WavefrontSizeLog2 = 0

Detailed Description

Definition at line 30 of file AMDGPUSubtarget.h.

Member Enumeration Documentation

◆ Generation

enum llvm::AMDGPUSubtarget::Generation

Enumerator
INVALID
R600
R700
EVERGREEN
NORTHERN_ISLANDS
SOUTHERN_ISLANDS
SEA_ISLANDS
VOLCANIC_ISLANDS
GFX9
GFX10
GFX11
GFX12
GFX13

Definition at line 32 of file AMDGPUSubtarget.h.

Constructor & Destructor Documentation

◆ AMDGPUSubtarget()

llvm::AMDGPUSubtarget::AMDGPUSubtarget ( Triple TT )

inline

Definition at line 64 of file AMDGPUSubtarget.h.

References llvm::move().

Referenced by llvm::GCNSubtarget::GCNSubtarget(), get(), get(), and llvm::R600Subtarget::R600Subtarget().

◆ ~AMDGPUSubtarget()

virtual llvm::AMDGPUSubtarget::~AMDGPUSubtarget ( )

virtualdefault

Member Function Documentation

◆ get() [1/2]

const AMDGPUSubtarget & AMDGPUSubtarget::get ( const MachineFunction & MF )

static

Definition at line 413 of file AMDGPUSubtarget.cpp.

References AMDGPUSubtarget(), llvm::MachineFunction::getSubtarget(), llvm::MachineFunction::getTarget(), llvm::TargetMachine::getTargetTriple(), and llvm::Triple::isAMDGCN().

◆ get() [2/2]

const AMDGPUSubtarget & AMDGPUSubtarget::get	(	const TargetMachine &	TM,
		const Function &	F )

static

Definition at line 419 of file AMDGPUSubtarget.cpp.

References AMDGPUSubtarget(), F, llvm::TargetMachine::getSubtarget(), llvm::TargetMachine::getTargetTriple(), and llvm::Triple::isAMDGCN().

◆ getAddressableLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getAddressableLocalMemorySize ( ) const

inline

Return the maximum number of bytes of LDS that can be allocated to a single workgroup.

For GFX10-GFX12 in WGP mode this is limited to 64k even though the WGP has 128k in total.

Definition at line 234 of file AMDGPUSubtarget.h.

References AddressableLocalMemorySize.

◆ getAlignmentForImplicitArgPtr()

Align llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr ( ) const

inline

Definition at line 243 of file AMDGPUSubtarget.h.

References isAmdHsaOS().

Referenced by getKernArgSegmentSize().

◆ getAMDGPUDwarfFlavour()

AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour ( ) const

Returns: Corresponding DWARF register number mapping flavour for the WavefrontSize.

Definition at line 408 of file AMDGPUSubtarget.cpp.

References getWavefrontSize(), llvm::Wave32, and llvm::Wave64.

◆ getDefaultFlatWorkGroupSize()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getDefaultFlatWorkGroupSize ( CallingConv::ID CC ) const

Returns: Default range flat work group size for a calling convention.

Definition at line 139 of file AMDGPUSubtarget.cpp.

References llvm::CallingConv::AMDGPU_ES, llvm::CallingConv::AMDGPU_GS, llvm::CallingConv::AMDGPU_HS, llvm::CallingConv::AMDGPU_LS, llvm::CallingConv::AMDGPU_PS, llvm::CallingConv::AMDGPU_VS, getMaxFlatWorkGroupSize(), and getWavefrontSize().

Referenced by getFlatWorkGroupSizes().

◆ getEffectiveWavesPerEU()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getEffectiveWavesPerEU	(	std::pair< unsigned, unsigned >	RequestedWavesPerEU,
		std::pair< unsigned, unsigned >	FlatWorkGroupSizes,
		unsigned	LDSBytes ) const

Returns the target minimum/maximum number of waves per EU.

This is based on the minimum/maximum number of RequestedWavesPerEU and further limited by the maximum achievable occupancy derived from the range of FlatWorkGroupSizes and number of LDSBytes per workgroup.

Definition at line 176 of file AMDGPUSubtarget.cpp.

References llvm::Default, getMaxWavesPerEU(), getOccupancyWithWorkGroupSizes(), and getWavesPerEUForWorkGroup().

Referenced by getWavesPerEU().

◆ getEUsPerCU()

unsigned llvm::AMDGPUSubtarget::getEUsPerCU ( ) const

inline

Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.

This takes WGP mode vs. CU mode into account.

Definition at line 241 of file AMDGPUSubtarget.h.

References EUsPerCU.

Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithWorkGroupSizes().

◆ getExplicitKernArgSize()

uint64_t AMDGPUSubtarget::getExplicitKernArgSize	(	const Function &	F,
		Align &	MaxAlign ) const

Definition at line 361 of file AMDGPUSubtarget.cpp.

References llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, assert(), DL, F, and llvm::CallingConv::SPIR_KERNEL.

Referenced by getKernArgSegmentSize().

◆ getExplicitKernelArgOffset()

unsigned llvm::AMDGPUSubtarget::getExplicitKernelArgOffset ( ) const

inline

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

Definition at line 249 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDHSA, llvm::Triple::AMDPAL, llvm_unreachable, llvm::Triple::Mesa3D, and llvm::Triple::UnknownOS.

Referenced by getKernArgSegmentSize(), and llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel().

◆ getFlatWorkGroupSizes()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getFlatWorkGroupSizes ( const Function & F ) const

Returns: Subtarget's default pair of minimum/maximum flat work group sizes for function F, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F.; Subtarget's default values if explicitly requested values cannot be converted to integer, or violate subtarget's specifications.

Definition at line 153 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerPairAttribute(), getMaxFlatWorkGroupSize(), and getMinFlatWorkGroupSize().

Referenced by getMaxLocalMemSizeWithWaveCount(), getMaxWorkitemID(), getOccupancyWithWorkGroupSizes(), getWavesPerEU(), and makeLIDRangeMetadata().

◆ getImplicitArgNumBytes()

unsigned AMDGPUSubtarget::getImplicitArgNumBytes ( const Function & F ) const

Returns: Number of bytes of arguments that are passed to a shader or kernel in addition to the explicit ones declared for the function.

Definition at line 342 of file AMDGPUSubtarget.cpp.

References llvm::AMDGPU::AMDHSA_COV5, assert(), F, llvm::AMDGPU::getAMDHSACodeObjectVersion(), llvm::AMDGPU::isKernel(), and isMesaKernel().

Referenced by getKernArgSegmentSize().

◆ getKernArgSegmentSize()

unsigned AMDGPUSubtarget::getKernArgSegmentSize	(	const Function &	F,
		Align &	MaxAlign ) const

Definition at line 386 of file AMDGPUSubtarget.cpp.

References llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, F, getAlignmentForImplicitArgPtr(), getExplicitKernArgSize(), getExplicitKernelArgOffset(), getImplicitArgNumBytes(), and llvm::CallingConv::SPIR_KERNEL.

Referenced by llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::getHSAKernelProps().

◆ getLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getLocalMemorySize ( ) const

inline

Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.

For GFX10-GFX12 in WGP mode this is 128k even though each workgroup is limited to 64k.

Definition at line 226 of file AMDGPUSubtarget.h.

References LocalMemorySize.

Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithWorkGroupSizes().

◆ getMaxFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize ( ) const

pure virtual

Returns: Maximum flat work group size supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getDefaultFlatWorkGroupSize(), and getFlatWorkGroupSizes().

◆ getMaxLocalMemSizeWithWaveCount()

unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount	(	unsigned	WaveCount,
		const Function &	F ) const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

Definition at line 39 of file AMDGPUSubtarget.cpp.

References F, getEUsPerCU(), getFlatWorkGroupSizes(), getLocalMemorySize(), getMaxLocalMemSizeWithWaveCount(), and getWavefrontSize().

Referenced by getMaxLocalMemSizeWithWaveCount().

◆ getMaxNumWorkGroups()

SmallVector< unsigned > AMDGPUSubtarget::getMaxNumWorkGroups ( const Function & F ) const

Return the number of work groups for the function.

Definition at line 428 of file AMDGPUSubtarget.cpp.

References F, and llvm::AMDGPU::getIntegerVecAttribute().

◆ getMaxWavesPerEU()

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const

inline

Returns: Maximum number of waves per execution unit supported by the subtarget without any kind of limitation.

Definition at line 286 of file AMDGPUSubtarget.h.

References MaxWavesPerEU.

Referenced by getEffectiveWavesPerEU(), getOccupancyWithWorkGroupSizes(), and getWavesPerEU().

◆ getMaxWorkGroupsPerCU()

virtual unsigned llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize ) const

pure virtual

Returns: Maximum number of work groups per compute unit supported by the subtarget and limited by given FlatWorkGroupSize.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getOccupancyWithWorkGroupSizes().

◆ getMaxWorkitemID()

unsigned AMDGPUSubtarget::getMaxWorkitemID	(	const Function &	Kernel,
		unsigned	Dimension ) const

Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.

Definition at line 257 of file AMDGPUSubtarget.cpp.

References getFlatWorkGroupSizes(), and getReqdWorkGroupSize().

Referenced by isSingleLaneExecution().

◆ getMinFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize ( ) const

pure virtual

Returns: Minimum flat work group size supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getFlatWorkGroupSizes().

◆ getMinWavesPerEU()

virtual unsigned llvm::AMDGPUSubtarget::getMinWavesPerEU ( ) const

pure virtual

Returns: Minimum number of waves per execution unit supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

◆ getOccupancyWithWorkGroupSizes() [1/3]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getOccupancyWithWorkGroupSizes ( const MachineFunction & MF ) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF.

This notably depends on the range of allowed flat group sizes for the function, the amount of per-workgroup LDS space required by the function, and hardware characteristics.

Definition at line 132 of file AMDGPUSubtarget.cpp.

References llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), and getOccupancyWithWorkGroupSizes().

◆ getOccupancyWithWorkGroupSizes() [2/3]

std::pair< unsigned, unsigned > llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes	(	uint32_t	LDSBytes,
		const Function &	F ) const

inline

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space.

This notably depends on the range of allowed flat group sizes for the function and hardware characteristics.

Definition at line 141 of file AMDGPUSubtarget.h.

References F, getFlatWorkGroupSizes(), and getOccupancyWithWorkGroupSizes().

Referenced by llvm::GCNSubtarget::computeOccupancy(), getEffectiveWavesPerEU(), getOccupancyWithWorkGroupSizes(), and getOccupancyWithWorkGroupSizes().

◆ getOccupancyWithWorkGroupSizes() [3/3]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getOccupancyWithWorkGroupSizes	(	uint32_t	LDSBytes,
		std::pair< unsigned, unsigned >	FlatWorkGroupSizes ) const

Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.

FlatWorkGroupSizes should correspond to the function's value for getFlatWorkGroupSizes.

Definition at line 52 of file AMDGPUSubtarget.cpp.

References llvm::divideCeil(), getEUsPerCU(), getLocalMemorySize(), getMaxWavesPerEU(), getMaxWorkGroupsPerCU(), getWavefrontSize(), and std::swap().

◆ getReqdWorkGroupSize()

std::optional< unsigned > AMDGPUSubtarget::getReqdWorkGroupSize	(	const Function &	F,
		unsigned	Dim ) const

Returns: The required size of workgroups that will be used to execute F in the Dim dimension, if it is known (from !reqd_work_group_size metadata. Otherwise, returns std::nullopt.

Definition at line 227 of file AMDGPUSubtarget.cpp.

References llvm::mdconst::extract(), and llvm::GlobalObject::getMetadata().

Referenced by getMaxWorkitemID(), and makeLIDRangeMetadata().

◆ getWavefrontSize()

unsigned llvm::AMDGPUSubtarget::getWavefrontSize ( ) const

inline

Definition at line 214 of file AMDGPUSubtarget.h.

References WavefrontSizeLog2.

Referenced by getAMDGPUDwarfFlavour(), getDefaultFlatWorkGroupSize(), llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::getHSAKernelProps(), getMaxLocalMemSizeWithWaveCount(), getOccupancyWithWorkGroupSizes(), hasWavefrontsEvenlySplittingXDim(), llvm::GCNSubtarget::isWave32(), llvm::GCNSubtarget::isWave64(), lowerFCMPIntrinsic(), and lowerICMPIntrinsic().

◆ getWavefrontSizeLog2()

unsigned llvm::AMDGPUSubtarget::getWavefrontSizeLog2 ( ) const

inline

Definition at line 218 of file AMDGPUSubtarget.h.

References WavefrontSizeLog2.

Referenced by llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex(), and llvm::GCNSubtarget::initializeSubtargetDependencies().

◆ getWavesPerEU() [1/2]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU ( const Function & F ) const

Returns: Subtarget's default pair of minimum/maximum number of waves per execution unit for function F, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F.; Subtarget's default values if explicitly requested values cannot be converted to integer, violate subtarget's specifications, or are not compatible with minimum/maximum number of waves limited by flat work group size, register usage, and/or lds usage.

Definition at line 203 of file AMDGPUSubtarget.cpp.

References F, getFlatWorkGroupSizes(), llvm::AMDGPU::getIntegerPairAttribute(), and getWavesPerEU().

Referenced by llvm::GCNSubtarget::getMaxNumSGPRs(), llvm::GCNSubtarget::getMaxNumVGPRs(), and getWavesPerEU().

◆ getWavesPerEU() [2/2]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU	(	std::pair< unsigned, unsigned >	FlatWorkGroupSizes,
		unsigned	LDSBytes,
		const Function &	F ) const

Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.

FlatWorkGroupSizes should correspond to the function's value for getFlatWorkGroupSizes and LDSBytes to the per-workgroup LDS allocation.

Definition at line 215 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getEffectiveWavesPerEU(), llvm::AMDGPU::getIntegerPairAttribute(), and getMaxWavesPerEU().

◆ getWavesPerEUForWorkGroup()

virtual unsigned llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize ) const

pure virtual

Returns: Number of waves per execution unit required to support the given FlatWorkGroupSize.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getEffectiveWavesPerEU().

◆ hasFminFmaxLegacy()

bool llvm::AMDGPUSubtarget::hasFminFmaxLegacy ( ) const

inline

Definition at line 210 of file AMDGPUSubtarget.h.

References HasFminFmaxLegacy.

◆ hasMulI24()

bool llvm::AMDGPUSubtarget::hasMulI24 ( ) const

inline

Definition at line 198 of file AMDGPUSubtarget.h.

References HasMulI24.

◆ hasMulU24()

bool llvm::AMDGPUSubtarget::hasMulU24 ( ) const

inline

Definition at line 202 of file AMDGPUSubtarget.h.

References HasMulU24.

◆ hasSMulHi()

bool llvm::AMDGPUSubtarget::hasSMulHi ( ) const

inline

Definition at line 206 of file AMDGPUSubtarget.h.

References HasSMulHi.

◆ hasWavefrontsEvenlySplittingXDim()

bool AMDGPUSubtarget::hasWavefrontsEvenlySplittingXDim	(	const Function &	F,
		bool	REquiresUniformYZ = false ) const

Returns: true if F will execute in a manner that leaves the X dimensions of the workitem ID evenly tiling wavefronts - that is, if X / wavefrontsize is uniform. This is true if either the Y and Z block dimensions are known to always be 1 or if the X dimension will always be a power of 2. If RequireUniformYZ is true, it also ensures that the Y and Z workitem IDs will be uniform (so, while a (32, 2, 1) launch with wavesize64 would ordinarily pass this test, it won't with \pRequiresUniformYZ).

This information is currently only gathered from the !reqd_work_group_size metadata on F, but this may be improved in the future.

Definition at line 235 of file AMDGPUSubtarget.cpp.

References llvm::mdconst::extract(), F, getWavefrontSize(), and llvm::isPowerOf2_32().

◆ isAmdHsaOrMesa()

bool llvm::AMDGPUSubtarget::isAmdHsaOrMesa ( const Function & F ) const

inline

Definition at line 174 of file AMDGPUSubtarget.h.

References F, isAmdHsaOS(), and isMesaKernel().

◆ isAmdHsaOS()

bool llvm::AMDGPUSubtarget::isAmdHsaOS ( ) const

inline

Definition at line 160 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDHSA.

Referenced by getAlignmentForImplicitArgPtr(), llvm::GCNSubtarget::getTrapHandlerAbi(), llvm::GCNSubtarget::initializeSubtargetDependencies(), isAmdHsaOrMesa(), and llvm::AMDGPUAsmPrinter::runOnMachineFunction().

◆ isAmdPalOS()

bool llvm::AMDGPUSubtarget::isAmdPalOS ( ) const

inline

Definition at line 164 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDPAL.

Referenced by llvm::AMDGPUAsmPrinter::runOnMachineFunction().

◆ isGCN()

bool llvm::AMDGPUSubtarget::isGCN ( ) const

inline

Definition at line 178 of file AMDGPUSubtarget.h.

◆ isMesa3DOS()

bool llvm::AMDGPUSubtarget::isMesa3DOS ( ) const

inline

Definition at line 168 of file AMDGPUSubtarget.h.

References llvm::Triple::Mesa3D.

Referenced by llvm::GCNSubtarget::isMesaGfxShader(), and isMesaKernel().

◆ isMesaKernel()

bool AMDGPUSubtarget::isMesaKernel ( const Function & F ) const

Definition at line 253 of file AMDGPUSubtarget.cpp.

References F, isMesa3DOS(), and llvm::AMDGPU::isShader().

Referenced by getImplicitArgNumBytes(), and isAmdHsaOrMesa().

◆ isSingleLaneExecution()

bool AMDGPUSubtarget::isSingleLaneExecution ( const Function & Kernel ) const

Return true if only a single workitem can be active in a wave.

Definition at line 265 of file AMDGPUSubtarget.cpp.

References getMaxWorkitemID(), and I.

◆ makeLIDRangeMetadata()

bool AMDGPUSubtarget::makeLIDRangeMetadata ( Instruction * I ) const

Creates value range metadata on an workitemid.* intrinsic call or load.

Definition at line 274 of file AMDGPUSubtarget.cpp.

References llvm::MDBuilder::createRange(), llvm::dyn_cast(), F, getFlatWorkGroupSizes(), getReqdWorkGroupSize(), I, llvm::Lower, Range, and llvm::Upper.

◆ useRealTrue16Insts()

bool llvm::AMDGPUSubtarget::useRealTrue16Insts ( ) const

inline

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.

Definition at line 194 of file AMDGPUSubtarget.h.

Member Data Documentation

◆ AddressableLocalMemorySize

unsigned llvm::AMDGPUSubtarget::AddressableLocalMemorySize = 0

protected

Definition at line 60 of file AMDGPUSubtarget.h.

Referenced by getAddressableLocalMemorySize(), llvm::GCNSubtarget::initializeSubtargetDependencies(), and llvm::R600Subtarget::R600Subtarget().

◆ EUsPerCU

unsigned llvm::AMDGPUSubtarget::EUsPerCU = 4

protected

Definition at line 57 of file AMDGPUSubtarget.h.

Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getEUsPerCU().

◆ HasFminFmaxLegacy

bool llvm::AMDGPUSubtarget::HasFminFmaxLegacy = true

protected

Definition at line 55 of file AMDGPUSubtarget.h.

Referenced by hasFminFmaxLegacy(), and llvm::GCNSubtarget::initializeSubtargetDependencies().

◆ HasMulI24

bool llvm::AMDGPUSubtarget::HasMulI24 = true

protected

Definition at line 52 of file AMDGPUSubtarget.h.

Referenced by hasMulI24(), and llvm::R600Subtarget::initializeSubtargetDependencies().

◆ HasMulU24

bool llvm::AMDGPUSubtarget::HasMulU24 = true

protected

Definition at line 53 of file AMDGPUSubtarget.h.

Referenced by hasMulU24(), and llvm::R600Subtarget::initializeSubtargetDependencies().

◆ HasSMulHi

bool llvm::AMDGPUSubtarget::HasSMulHi = false

protected

Definition at line 54 of file AMDGPUSubtarget.h.

Referenced by hasSMulHi(), and llvm::GCNSubtarget::initializeSubtargetDependencies().

◆ LocalMemorySize

unsigned llvm::AMDGPUSubtarget::LocalMemorySize = 0

protected

Definition at line 59 of file AMDGPUSubtarget.h.

Referenced by getLocalMemorySize(), llvm::GCNSubtarget::initializeSubtargetDependencies(), and llvm::R600Subtarget::R600Subtarget().

◆ MaxWavesPerEU

unsigned llvm::AMDGPUSubtarget::MaxWavesPerEU = 10

protected

Definition at line 58 of file AMDGPUSubtarget.h.

Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getMaxWavesPerEU().

◆ WavefrontSizeLog2

char llvm::AMDGPUSubtarget::WavefrontSizeLog2 = 0

protected

Definition at line 61 of file AMDGPUSubtarget.h.

Referenced by getWavefrontSize(), getWavefrontSizeLog2(), and llvm::GCNSubtarget::initializeSubtargetDependencies().

The documentation for this class was generated from the following files:

lib/Target/AMDGPU/AMDGPUSubtarget.h
lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Public Types

Public Member Functions

Static Public Member Functions

Protected Attributes

Detailed Description

Member Enumeration Documentation

◆ Generation

Constructor & Destructor Documentation

◆ AMDGPUSubtarget()

◆ ~AMDGPUSubtarget()

Member Function Documentation

◆ get() [1/2]

◆ get() [2/2]

◆ getAddressableLocalMemorySize()

◆ getAlignmentForImplicitArgPtr()

◆ getAMDGPUDwarfFlavour()

◆ getDefaultFlatWorkGroupSize()

◆ getEffectiveWavesPerEU()

◆ getEUsPerCU()

◆ getExplicitKernArgSize()

◆ getExplicitKernelArgOffset()

◆ getFlatWorkGroupSizes()

◆ getImplicitArgNumBytes()

◆ getKernArgSegmentSize()

◆ getLocalMemorySize()

◆ getMaxFlatWorkGroupSize()

◆ getMaxLocalMemSizeWithWaveCount()

◆ getMaxNumWorkGroups()

◆ getMaxWavesPerEU()

◆ getMaxWorkGroupsPerCU()

◆ getMaxWorkitemID()

◆ getMinFlatWorkGroupSize()

◆ getMinWavesPerEU()

◆ getOccupancyWithWorkGroupSizes() [1/3]

◆ getOccupancyWithWorkGroupSizes() [2/3]

◆ getOccupancyWithWorkGroupSizes() [3/3]

◆ getReqdWorkGroupSize()

◆ getWavefrontSize()

◆ getWavefrontSizeLog2()

◆ getWavesPerEU() [1/2]

◆ getWavesPerEU() [2/2]

◆ getWavesPerEUForWorkGroup()

◆ hasFminFmaxLegacy()

◆ hasMulI24()

◆ hasMulU24()

◆ hasSMulHi()

◆ hasWavefrontsEvenlySplittingXDim()

◆ isAmdHsaOrMesa()

◆ isAmdHsaOS()

◆ isAmdPalOS()

◆ isGCN()

◆ isMesa3DOS()

◆ isMesaKernel()

◆ isSingleLaneExecution()

◆ makeLIDRangeMetadata()

◆ useRealTrue16Insts()

Member Data Documentation

◆ AddressableLocalMemorySize

◆ EUsPerCU

◆ HasFminFmaxLegacy

◆ HasMulI24

◆ HasMulU24

◆ HasSMulHi

◆ LocalMemorySize

◆ MaxWavesPerEU

◆ WavefrontSizeLog2