LLVM  3.7.0
AMDGPUTargetStreamer.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file provides AMDGPU specific target streamer methods.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUTargetStreamer.h"
15 #include "SIDefines.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCELFStreamer.h"
20 #include "llvm/MC/MCSectionELF.h"
21 #include "llvm/Support/ELF.h"
23 
24 using namespace llvm;
25 
27  : MCTargetStreamer(S) { }
28 
29 //===----------------------------------------------------------------------===//
30 // AMDGPUTargetAsmStreamer
31 //===----------------------------------------------------------------------===//
32 
35  : AMDGPUTargetStreamer(S), OS(OS) { }
36 
37 void
39  uint32_t Minor) {
40  OS << "\t.hsa_code_object_version " <<
41  Twine(Major) << "," << Twine(Minor) << '\n';
42 }
43 
44 void
46  uint32_t Minor,
47  uint32_t Stepping,
48  StringRef VendorName,
49  StringRef ArchName) {
50  OS << "\t.hsa_code_object_isa " <<
51  Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
52  ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
53 
54 }
55 
56 void
58  uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
59  bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
61  bool EnableSGPRDispatchPtr = (Header.code_properties &
63  bool EnableSGPRQueuePtr = (Header.code_properties &
65  bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
67  bool EnableSGPRDispatchID = (Header.code_properties &
69  bool EnableSGPRFlatScratchInit = (Header.code_properties &
71  bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
73  bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
75  bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
77  bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
79  bool EnableOrderedAppendGDS = (Header.code_properties &
81  uint32_t PrivateElementSize = (Header.code_properties &
84  bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
85  bool IsDynamicCallstack = (Header.code_properties &
87  bool IsDebugEnabled = (Header.code_properties &
89  bool IsXNackEnabled = (Header.code_properties &
91 
92  OS << "\t.amd_kernel_code_t\n" <<
93  "\t\tkernel_code_version_major = " <<
94  Header.amd_kernel_code_version_major << '\n' <<
95  "\t\tkernel_code_version_minor = " <<
96  Header.amd_kernel_code_version_minor << '\n' <<
97  "\t\tmachine_kind = " <<
98  Header.amd_machine_kind << '\n' <<
99  "\t\tmachine_version_major = " <<
100  Header.amd_machine_version_major << '\n' <<
101  "\t\tmachine_version_minor = " <<
102  Header.amd_machine_version_minor << '\n' <<
103  "\t\tmachine_version_stepping = " <<
104  Header.amd_machine_version_stepping << '\n' <<
105  "\t\tkernel_code_entry_byte_offset = " <<
106  Header.kernel_code_entry_byte_offset << '\n' <<
107  "\t\tkernel_code_prefetch_byte_size = " <<
108  Header.kernel_code_prefetch_byte_size << '\n' <<
109  "\t\tmax_scratch_backing_memory_byte_size = " <<
110  Header.max_scratch_backing_memory_byte_size << '\n' <<
111  "\t\tcompute_pgm_rsrc1_vgprs = " <<
113  "\t\tcompute_pgm_rsrc1_sgprs = " <<
115  "\t\tcompute_pgm_rsrc1_priority = " <<
117  "\t\tcompute_pgm_rsrc1_float_mode = " <<
119  "\t\tcompute_pgm_rsrc1_priv = " <<
121  "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
123  "\t\tcompute_pgm_rsrc1_debug_mode = " <<
125  "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
127  "\t\tcompute_pgm_rsrc2_scratch_en = " <<
128  G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
129  "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
130  G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
131  "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
132  G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
133  "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
134  G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
135  "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
136  G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
137  "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
138  G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
139  "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
140  G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
141  "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
142  G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
143  "\t\tcompute_pgm_rsrc2_lds_size = " <<
144  G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
145  "\t\tcompute_pgm_rsrc2_excp_en = " <<
146  G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
147 
148  "\t\tenable_sgpr_private_segment_buffer = " <<
149  EnableSGPRPrivateSegmentBuffer << '\n' <<
150  "\t\tenable_sgpr_dispatch_ptr = " <<
151  EnableSGPRDispatchPtr << '\n' <<
152  "\t\tenable_sgpr_queue_ptr = " <<
153  EnableSGPRQueuePtr << '\n' <<
154  "\t\tenable_sgpr_kernarg_segment_ptr = " <<
155  EnableSGPRKernargSegmentPtr << '\n' <<
156  "\t\tenable_sgpr_dispatch_id = " <<
157  EnableSGPRDispatchID << '\n' <<
158  "\t\tenable_sgpr_flat_scratch_init = " <<
159  EnableSGPRFlatScratchInit << '\n' <<
160  "\t\tenable_sgpr_private_segment_size = " <<
161  EnableSGPRPrivateSegmentSize << '\n' <<
162  "\t\tenable_sgpr_grid_workgroup_count_x = " <<
163  EnableSGPRGridWorkgroupCountX << '\n' <<
164  "\t\tenable_sgpr_grid_workgroup_count_y = " <<
165  EnableSGPRGridWorkgroupCountY << '\n' <<
166  "\t\tenable_sgpr_grid_workgroup_count_z = " <<
167  EnableSGPRGridWorkgroupCountZ << '\n' <<
168  "\t\tenable_ordered_append_gds = " <<
169  EnableOrderedAppendGDS << '\n' <<
170  "\t\tprivate_element_size = " <<
171  PrivateElementSize << '\n' <<
172  "\t\tis_ptr64 = " <<
173  IsPtr64 << '\n' <<
174  "\t\tis_dynamic_callstack = " <<
175  IsDynamicCallstack << '\n' <<
176  "\t\tis_debug_enabled = " <<
177  IsDebugEnabled << '\n' <<
178  "\t\tis_xnack_enabled = " <<
179  IsXNackEnabled << '\n' <<
180  "\t\tworkitem_private_segment_byte_size = " <<
181  Header.workitem_private_segment_byte_size << '\n' <<
182  "\t\tworkgroup_group_segment_byte_size = " <<
183  Header.workgroup_group_segment_byte_size << '\n' <<
184  "\t\tgds_segment_byte_size = " <<
185  Header.gds_segment_byte_size << '\n' <<
186  "\t\tkernarg_segment_byte_size = " <<
187  Header.kernarg_segment_byte_size << '\n' <<
188  "\t\tworkgroup_fbarrier_count = " <<
189  Header.workgroup_fbarrier_count << '\n' <<
190  "\t\twavefront_sgpr_count = " <<
191  Header.wavefront_sgpr_count << '\n' <<
192  "\t\tworkitem_vgpr_count = " <<
193  Header.workitem_vgpr_count << '\n' <<
194  "\t\treserved_vgpr_first = " <<
195  Header.reserved_vgpr_first << '\n' <<
196  "\t\treserved_vgpr_count = " <<
197  Header.reserved_vgpr_count << '\n' <<
198  "\t\treserved_sgpr_first = " <<
199  Header.reserved_sgpr_first << '\n' <<
200  "\t\treserved_sgpr_count = " <<
201  Header.reserved_sgpr_count << '\n' <<
202  "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
204  "\t\tdebug_private_segment_buffer_sgpr = " <<
205  Header.debug_private_segment_buffer_sgpr << '\n' <<
206  "\t\tkernarg_segment_alignment = " <<
207  (uint32_t)Header.kernarg_segment_alignment << '\n' <<
208  "\t\tgroup_segment_alignment = " <<
209  (uint32_t)Header.group_segment_alignment << '\n' <<
210  "\t\tprivate_segment_alignment = " <<
211  (uint32_t)Header.private_segment_alignment << '\n' <<
212  "\t\twavefront_size = " <<
213  (uint32_t)Header.wavefront_size << '\n' <<
214  "\t\tcall_convention = " <<
215  Header.call_convention << '\n' <<
216  "\t\truntime_loader_kernel_symbol = " <<
217  Header.runtime_loader_kernel_symbol << '\n' <<
218  // TODO: control_directives
219  "\t.end_amd_kernel_code_t\n";
220 
221 }
222 
223 //===----------------------------------------------------------------------===//
224 // AMDGPUTargetELFStreamer
225 //===----------------------------------------------------------------------===//
226 
228  : AMDGPUTargetStreamer(S), Streamer(S) { }
229 
231  return static_cast<MCELFStreamer &>(Streamer);
232 }
233 
234 void
236  uint32_t Minor) {
237  MCStreamer &OS = getStreamer();
238  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
239 
240  unsigned NameSZ = 4;
241 
242  OS.PushSection();
243  OS.SwitchSection(Note);
244  OS.EmitIntValue(NameSZ, 4); // namesz
245  OS.EmitIntValue(8, 4); // descz
246  OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
247  OS.EmitBytes(StringRef("AMD", NameSZ)); // name
248  OS.EmitIntValue(Major, 4); // desc
249  OS.EmitIntValue(Minor, 4);
250  OS.EmitValueToAlignment(4);
251  OS.PopSection();
252 }
253 
254 void
256  uint32_t Minor,
257  uint32_t Stepping,
258  StringRef VendorName,
259  StringRef ArchName) {
260  MCStreamer &OS = getStreamer();
261  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
262 
263  unsigned NameSZ = 4;
264  uint16_t VendorNameSize = VendorName.size() + 1;
265  uint16_t ArchNameSize = ArchName.size() + 1;
266  unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
267  sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
268  VendorNameSize + ArchNameSize;
269 
270  OS.PushSection();
271  OS.SwitchSection(Note);
272  OS.EmitIntValue(NameSZ, 4); // namesz
273  OS.EmitIntValue(DescSZ, 4); // descsz
274  OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type
275  OS.EmitBytes(StringRef("AMD", 4)); // name
276  OS.EmitIntValue(VendorNameSize, 2); // desc
277  OS.EmitIntValue(ArchNameSize, 2);
278  OS.EmitIntValue(Major, 4);
279  OS.EmitIntValue(Minor, 4);
280  OS.EmitIntValue(Stepping, 4);
281  OS.EmitBytes(VendorName);
282  OS.EmitIntValue(0, 1); // NULL terminate VendorName
283  OS.EmitBytes(ArchName);
284  OS.EmitIntValue(0, 1); // NULL terminte ArchName
285  OS.EmitValueToAlignment(4);
286  OS.PopSection();
287 }
288 
289 void
291 
292  MCStreamer &OS = getStreamer();
293  OS.PushSection();
295  OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
296  OS.PopSection();
297 }
#define G_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:152
#define G_00B848_DEBUG_MODE(x)
Definition: SIDefines.h:161
#define G_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:128
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:109
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
MCSectionELF * getELFSection(StringRef Section, unsigned Type, unsigned Flags)
Definition: MCContext.h:311
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:118
virtual void EmitBytes(StringRef Data)
Emit the bytes in Data into the output.
Definition: MCStreamer.cpp:681
Target specific streamer interface.
Definition: MCStreamer.h:73
#define G_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:121
void PushSection()
Save the current and previous section on the section stack.
Definition: MCStreamer.h:301
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:115
#define G_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:106
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName) override
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName) override
uint32_t amd_kernel_code_version_major
uint32_t code_properties
Code properties.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
AMD Kernel Code Object (amd_kernel_code_t).
uint16_t amd_machine_version_major
uint16_t reserved_sgpr_first
If reserved_sgpr_count is 0 then must be 0.
MCContext & getContext() const
Definition: MCStreamer.h:210
#define G_00B848_SGPRS(x)
Definition: SIDefines.h:146
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
#define G_00B848_VGPRS(x)
Definition: SIDefines.h:143
uint8_t group_segment_alignment
#define G_00B848_PRIV(x)
Definition: SIDefines.h:155
uint16_t amd_machine_version_minor
uint32_t amd_kernel_code_version_minor
uint64_t runtime_loader_kernel_symbol
virtual void EmitIntValue(uint64_t Value, unsigned Size)
Special case of EmitValue that avoids the client having to pass in a MCExpr for constant integers...
Definition: MCStreamer.cpp:79
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Streaming machine code generation interface.
Definition: MCStreamer.h:157
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:112
virtual void SwitchSection(MCSection *Section, const MCExpr *Subsection=nullptr)
Set the current section where code is being emitted to Section.
Definition: MCStreamer.cpp:701
void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override
uint32_t gds_segment_byte_size
Number of byte of GDS required by kernel dispatch.
uint8_t private_segment_alignment
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value=0, unsigned ValueSize=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
Definition: MCStreamer.cpp:688
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
MCSection * getTextSection() const
#define G_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:132
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:124
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:164
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override
uint16_t debug_wavefront_private_segment_offset_sgpr
If is_debug_supported is 0 then must be 0.
uint16_t amd_machine_version_stepping
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
uint8_t wavefront_size
Wavefront size expressed as a power of two.
void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
#define G_00B848_PRIORITY(x)
Definition: SIDefines.h:149
The interleave (swizzle) element size in bytes required by the code for private memory.
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
uint64_t kernel_code_prefetch_byte_size
MCSectionELF - This represents a section on linux, lots of unix variants and some bare metal systems...
Definition: MCSectionELF.h:30
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel's entry point instru...
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override
#define G_00B84C_EXCP_EN(x)
Definition: SIDefines.h:135
bool PopSection()
Restore the current and previous section from the section stack.
Definition: MCStreamer.h:310
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:158
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:229
uint16_t amd_machine_kind
uint32_t workgroup_fbarrier_count
Number of fbarrier's used in the kernel and all functions it calls.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
uint16_t debug_private_segment_buffer_sgpr
If is_debug_supported is 0 then must be 0.
uint16_t reserved_vgpr_count
The number of consecutive VGPRs reserved by the client.
uint64_t max_scratch_backing_memory_byte_size
Number of bytes of scratch backing memory required for full occupancy of target chip.
uint16_t reserved_vgpr_first
If reserved_vgpr_count is 0 then must be 0.
uint16_t reserved_sgpr_count
The number of consecutive SGPRs reserved by the client.