LLVM 20.0.0git
|
#include <cstdint>
Go to the source code of this file.
Classes | |
struct | hsa_dim3_s |
struct | hsa_ext_control_directives_s |
The hsa_ext_control_directives_t specifies the values for the HSAIL control directives. More... | |
struct | amd_kernel_code_t |
AMD Kernel Code Object (amd_kernel_code_t). More... | |
Macros | |
#define | AMD_HSA_BITS_SET(dst, mask, val) |
#define | AMD_HSA_BITS_GET(src, mask) ((src & mask) >> mask ## _SHIFT) \ |
Typedefs | |
typedef uint8_t | hsa_powertwo8_t |
typedef uint32_t | hsa_ext_code_kind_t |
typedef uint8_t | hsa_ext_brig_profile8_t |
typedef uint8_t | hsa_ext_brig_machine_model8_t |
typedef uint64_t | hsa_ext_control_directive_present64_t |
typedef uint16_t | hsa_ext_exception_kind16_t |
typedef uint32_t | hsa_ext_code_kind32_t |
typedef struct hsa_dim3_s | hsa_dim3_t |
typedef uint32_t | amd_code_version32_t |
The version of the amd_*_code_t struct. | |
typedef uint64_t | amd_compute_pgm_resource_register64_t |
Shader program settings for CS. | |
typedef uint32_t | amd_code_property32_t |
Every amd_*_code_t has the following properties, which are composed of a number of bit fields. | |
typedef struct hsa_ext_control_directives_s | hsa_ext_control_directives_t |
The hsa_ext_control_directives_t specifies the values for the HSAIL control directives. | |
#define AMD_HSA_BITS_GET | ( | src, | |
mask | |||
) | ((src & mask) >> mask ## _SHIFT) \ |
Definition at line 48 of file AMDKernelCodeT.h.
#define AMD_HSA_BITS_SET | ( | dst, | |
mask, | |||
val | |||
) |
Definition at line 43 of file AMDKernelCodeT.h.
typedef uint32_t amd_code_property32_t |
Every amd_*_code_t has the following properties, which are composed of a number of bit fields.
Every bit field has a mask (AMD_CODE_PROPERTY_*), bit width (AMD_CODE_PROPERTY_*_WIDTH, and bit shift amount (AMD_CODE_PROPERTY_*_SHIFT) for convenient access. Unused bits must be 0.
(Note that bit fields cannot be used as their layout is implementation defined in the C standard and so cannot be used to specify an ABI)
Definition at line 72 of file AMDKernelCodeT.h.
typedef uint32_t amd_code_version32_t |
The version of the amd_*_code_t struct.
Minor versions must be backward compatible.
Definition at line 36 of file AMDKernelCodeT.h.
Shader program settings for CS.
Contains COMPUTE_PGM_RSRC1 and COMPUTE_PGM_RSRC2 registers.
Definition at line 62 of file AMDKernelCodeT.h.
typedef struct hsa_dim3_s hsa_dim3_t |
typedef uint8_t hsa_ext_brig_machine_model8_t |
Definition at line 23 of file AMDKernelCodeT.h.
typedef uint8_t hsa_ext_brig_profile8_t |
Definition at line 22 of file AMDKernelCodeT.h.
typedef uint32_t hsa_ext_code_kind32_t |
Definition at line 26 of file AMDKernelCodeT.h.
typedef uint32_t hsa_ext_code_kind_t |
Definition at line 21 of file AMDKernelCodeT.h.
Definition at line 24 of file AMDKernelCodeT.h.
typedef struct hsa_ext_control_directives_s hsa_ext_control_directives_t |
The hsa_ext_control_directives_t specifies the values for the HSAIL control directives.
These control how the finalizer generates code. This struct is used both as an argument to hsaFinalizeKernel to specify values for the control directives, and is used in HsaKernelCode to record the values of the control directives that the finalize used when generating the code which either came from the finalizer argument or explicit HSAIL control directives. See the definition of the control directives in HSA Programmer's Reference Manual which also defines how the values specified as finalizer arguments have to agree with the control directives in the HSAIL code.
typedef uint16_t hsa_ext_exception_kind16_t |
Definition at line 25 of file AMDKernelCodeT.h.
typedef uint8_t hsa_powertwo8_t |
Definition at line 20 of file AMDKernelCodeT.h.
Enumerator | |
---|---|
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT | Enable the setup of the SGPR user data registers (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t for initial register state. The total number of SGPRuser data registers requested must not exceed 16. Any requests beyond 16 will be ignored. Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of SGPR user data registers enabled up to 16). |
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER | |
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR | |
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR | |
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR | |
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID | |
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z | |
AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT | |
AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 | |
AMD_CODE_PROPERTY_RESERVED1_SHIFT | |
AMD_CODE_PROPERTY_RESERVED1_WIDTH | |
AMD_CODE_PROPERTY_RESERVED1 | |
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT | Control wave ID base counter for GDS ordered-append. Used to set COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if ORDERED_APPEND_MODE also needs to be settable) |
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH | |
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS | |
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT | The interleave (swizzle) element size in bytes required by the code for private memory. This must be 2, 4, 8 or 16. This value is provided to the finalizer when it is invoked and is recorded here. The hardware will interleave the memory requests of each lane of a wavefront by this element size to ensure each work-item gets a distinct memory location. Therefore, the finalizer ensures that all load and store operations done to private memory do not exceed this size. For example, if the element size is 4 (32-bits or dword) and a 64-bit value must be loaded, the finalizer will generate two 32-bit loads. This ensures that the interleaving will get the work-item specific dword for both halves of the 64-bit value. If it just did a 64-bit load then it would get one dword which belonged to its own work-item, but the second dword would belong to the adjacent lane work-item since the interleaving is in dwords. The value used must match the value that the runtime configures the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This is generally DWORD. uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM. |
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH | |
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE | |
AMD_CODE_PROPERTY_IS_PTR64_SHIFT | Are global memory addresses 64 bits. Must match amd_kernel_code_t.hsail_machine_model == HSA_MACHINE_LARGE. Must also match SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)), SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+). |
AMD_CODE_PROPERTY_IS_PTR64_WIDTH | |
AMD_CODE_PROPERTY_IS_PTR64 | |
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT | Indicate if the generated ISA is using a dynamically sized call stack. This can happen if calls are implemented using a call stack and recursion, alloca or calls to indirect functions are present. In these cases the Finalizer cannot compute the total private segment size at compile time. In this case the workitem_private_segment_byte_size only specifies the statically know private segment size, and additional space must be added for the call stack. |
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH | |
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK | |
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT | Indicate if code generated has support for debugging. |
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH | |
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED | |
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT | |
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH | |
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED | |
AMD_CODE_PROPERTY_RESERVED2_SHIFT | |
AMD_CODE_PROPERTY_RESERVED2_WIDTH | |
AMD_CODE_PROPERTY_RESERVED2 |
Definition at line 73 of file AMDKernelCodeT.h.
enum amd_code_version_t |
Enumerator | |
---|---|
AMD_CODE_VERSION_MAJOR | |
AMD_CODE_VERSION_MINOR |
Definition at line 37 of file AMDKernelCodeT.h.
The values used to define the number of bytes to use for the swizzle element size.
Enumerator | |
---|---|
AMD_ELEMENT_2_BYTES | |
AMD_ELEMENT_4_BYTES | |
AMD_ELEMENT_8_BYTES | |
AMD_ELEMENT_16_BYTES |
Definition at line 53 of file AMDKernelCodeT.h.