40 OS <<
"\t.hsa_code_object_version " <<
50 OS <<
"\t.hsa_code_object_isa " <<
52 ",\"" << VendorName <<
"\",\"" << ArchName <<
"\"\n";
92 OS <<
"\t.amd_kernel_code_t\n" <<
93 "\t\tkernel_code_version_major = " <<
95 "\t\tkernel_code_version_minor = " <<
97 "\t\tmachine_kind = " <<
99 "\t\tmachine_version_major = " <<
101 "\t\tmachine_version_minor = " <<
103 "\t\tmachine_version_stepping = " <<
105 "\t\tkernel_code_entry_byte_offset = " <<
107 "\t\tkernel_code_prefetch_byte_size = " <<
109 "\t\tmax_scratch_backing_memory_byte_size = " <<
111 "\t\tcompute_pgm_rsrc1_vgprs = " <<
113 "\t\tcompute_pgm_rsrc1_sgprs = " <<
115 "\t\tcompute_pgm_rsrc1_priority = " <<
117 "\t\tcompute_pgm_rsrc1_float_mode = " <<
119 "\t\tcompute_pgm_rsrc1_priv = " <<
121 "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
123 "\t\tcompute_pgm_rsrc1_debug_mode = " <<
125 "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
127 "\t\tcompute_pgm_rsrc2_scratch_en = " <<
129 "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
131 "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
133 "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
135 "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
137 "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
139 "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
141 "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
143 "\t\tcompute_pgm_rsrc2_lds_size = " <<
145 "\t\tcompute_pgm_rsrc2_excp_en = " <<
148 "\t\tenable_sgpr_private_segment_buffer = " <<
149 EnableSGPRPrivateSegmentBuffer <<
'\n' <<
150 "\t\tenable_sgpr_dispatch_ptr = " <<
151 EnableSGPRDispatchPtr <<
'\n' <<
152 "\t\tenable_sgpr_queue_ptr = " <<
153 EnableSGPRQueuePtr <<
'\n' <<
154 "\t\tenable_sgpr_kernarg_segment_ptr = " <<
155 EnableSGPRKernargSegmentPtr <<
'\n' <<
156 "\t\tenable_sgpr_dispatch_id = " <<
157 EnableSGPRDispatchID <<
'\n' <<
158 "\t\tenable_sgpr_flat_scratch_init = " <<
159 EnableSGPRFlatScratchInit <<
'\n' <<
160 "\t\tenable_sgpr_private_segment_size = " <<
161 EnableSGPRPrivateSegmentSize <<
'\n' <<
162 "\t\tenable_sgpr_grid_workgroup_count_x = " <<
163 EnableSGPRGridWorkgroupCountX <<
'\n' <<
164 "\t\tenable_sgpr_grid_workgroup_count_y = " <<
165 EnableSGPRGridWorkgroupCountY <<
'\n' <<
166 "\t\tenable_sgpr_grid_workgroup_count_z = " <<
167 EnableSGPRGridWorkgroupCountZ <<
'\n' <<
168 "\t\tenable_ordered_append_gds = " <<
169 EnableOrderedAppendGDS <<
'\n' <<
170 "\t\tprivate_element_size = " <<
171 PrivateElementSize <<
'\n' <<
174 "\t\tis_dynamic_callstack = " <<
175 IsDynamicCallstack <<
'\n' <<
176 "\t\tis_debug_enabled = " <<
177 IsDebugEnabled <<
'\n' <<
178 "\t\tis_xnack_enabled = " <<
179 IsXNackEnabled <<
'\n' <<
180 "\t\tworkitem_private_segment_byte_size = " <<
182 "\t\tworkgroup_group_segment_byte_size = " <<
184 "\t\tgds_segment_byte_size = " <<
186 "\t\tkernarg_segment_byte_size = " <<
188 "\t\tworkgroup_fbarrier_count = " <<
190 "\t\twavefront_sgpr_count = " <<
192 "\t\tworkitem_vgpr_count = " <<
194 "\t\treserved_vgpr_first = " <<
196 "\t\treserved_vgpr_count = " <<
198 "\t\treserved_sgpr_first = " <<
200 "\t\treserved_sgpr_count = " <<
202 "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
204 "\t\tdebug_private_segment_buffer_sgpr = " <<
206 "\t\tkernarg_segment_alignment = " <<
208 "\t\tgroup_segment_alignment = " <<
210 "\t\tprivate_segment_alignment = " <<
212 "\t\twavefront_size = " <<
214 "\t\tcall_convention = " <<
216 "\t\truntime_loader_kernel_symbol = " <<
219 "\t.end_amd_kernel_code_t\n";
264 uint16_t VendorNameSize = VendorName.
size() + 1;
265 uint16_t ArchNameSize = ArchName.
size() + 1;
266 unsigned DescSZ =
sizeof(VendorNameSize) +
sizeof(ArchNameSize) +
267 sizeof(Major) +
sizeof(Minor) +
sizeof(Stepping) +
268 VendorNameSize + ArchNameSize;
#define G_00B848_FLOAT_MODE(x)
#define G_00B848_DEBUG_MODE(x)
#define G_00B84C_EXCP_EN_MSB(x)
size_t size() const
size - Get the string size.
#define G_00B84C_USER_SGPR(x)
MCSectionELF * getELFSection(StringRef Section, unsigned Type, unsigned Flags)
#define G_00B84C_TGID_Z_EN(x)
virtual void EmitBytes(StringRef Data)
Emit the bytes in Data into the output.
Target specific streamer interface.
#define G_00B84C_TG_SIZE_EN(x)
void PushSection()
Save the current and previous section on the section stack.
#define G_00B84C_TGID_Y_EN(x)
#define G_00B84C_SCRATCH_EN(x)
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName) override
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName) override
uint32_t amd_kernel_code_version_major
uint32_t code_properties
Code properties.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
AMD Kernel Code Object (amd_kernel_code_t).
uint16_t amd_machine_version_major
uint16_t reserved_sgpr_first
If reserved_sgpr_count is 0 then must be 0.
MCContext & getContext() const
#define G_00B848_SGPRS(x)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
#define G_00B848_VGPRS(x)
uint8_t group_segment_alignment
uint16_t amd_machine_version_minor
uint32_t amd_kernel_code_version_minor
uint64_t runtime_loader_kernel_symbol
virtual void EmitIntValue(uint64_t Value, unsigned Size)
Special case of EmitValue that avoids the client having to pass in a MCExpr for constant integers...
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Streaming machine code generation interface.
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
#define G_00B84C_TGID_X_EN(x)
virtual void SwitchSection(MCSection *Section, const MCExpr *Subsection=nullptr)
Set the current section where code is being emitted to Section.
void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override
uint32_t gds_segment_byte_size
Number of byte of GDS required by kernel dispatch.
uint8_t private_segment_alignment
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value=0, unsigned ValueSize=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
MCSection * getTextSection() const
#define G_00B84C_LDS_SIZE(x)
MCELFStreamer & getStreamer()
#define G_00B84C_TIDIG_COMP_CNT(x)
AMDGPUTargetStreamer(MCStreamer &S)
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
AMDGPUTargetELFStreamer(MCStreamer &S)
#define G_00B848_IEEE_MODE(x)
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override
uint16_t debug_wavefront_private_segment_offset_sgpr
If is_debug_supported is 0 then must be 0.
uint16_t amd_machine_version_stepping
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
uint8_t wavefront_size
Wavefront size expressed as a power of two.
void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
#define G_00B848_PRIORITY(x)
The interleave (swizzle) element size in bytes required by the code for private memory.
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
uint64_t kernel_code_prefetch_byte_size
MCSectionELF - This represents a section on linux, lots of unix variants and some bare metal systems...
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel's entry point instru...
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override
#define G_00B84C_EXCP_EN(x)
bool PopSection()
Restore the current and previous section from the section stack.
#define G_00B848_DX10_CLAMP(x)
const MCObjectFileInfo * getObjectFileInfo() const
uint16_t amd_machine_kind
uint32_t workgroup_fbarrier_count
Number of fbarrier's used in the kernel and all functions it calls.
StringRef - Represent a constant reference to a string, i.e.
uint16_t debug_private_segment_buffer_sgpr
If is_debug_supported is 0 then must be 0.
uint16_t reserved_vgpr_count
The number of consecutive VGPRs reserved by the client.
uint64_t max_scratch_backing_memory_byte_size
Number of bytes of scratch backing memory required for full occupancy of target chip.
uint16_t reserved_vgpr_first
If reserved_vgpr_count is 0 then must be 0.
uint16_t reserved_sgpr_count
The number of consecutive SGPRs reserved by the client.