File: | lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp |
Warning: | line 99, column 23 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file provides AMDGPU specific target streamer methods. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "AMDGPUTargetStreamer.h" | |||
14 | #include "AMDGPU.h" | |||
15 | #include "SIDefines.h" | |||
16 | #include "Utils/AMDGPUBaseInfo.h" | |||
17 | #include "Utils/AMDKernelCodeTUtils.h" | |||
18 | #include "llvm/ADT/Twine.h" | |||
19 | #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" | |||
20 | #include "llvm/BinaryFormat/ELF.h" | |||
21 | #include "llvm/IR/Constants.h" | |||
22 | #include "llvm/IR/Function.h" | |||
23 | #include "llvm/IR/Metadata.h" | |||
24 | #include "llvm/IR/Module.h" | |||
25 | #include "llvm/MC/MCContext.h" | |||
26 | #include "llvm/MC/MCELFStreamer.h" | |||
27 | #include "llvm/MC/MCObjectFileInfo.h" | |||
28 | #include "llvm/MC/MCSectionELF.h" | |||
29 | #include "llvm/Support/FormattedStream.h" | |||
30 | #include "llvm/Support/TargetParser.h" | |||
31 | ||||
32 | namespace llvm { | |||
33 | #include "AMDGPUPTNote.h" | |||
34 | } | |||
35 | ||||
36 | using namespace llvm; | |||
37 | using namespace llvm::AMDGPU; | |||
38 | using namespace llvm::AMDGPU::HSAMD; | |||
39 | ||||
40 | //===----------------------------------------------------------------------===// | |||
41 | // AMDGPUTargetStreamer | |||
42 | //===----------------------------------------------------------------------===// | |||
43 | ||||
44 | bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { | |||
45 | HSAMD::Metadata HSAMetadata; | |||
46 | if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) | |||
47 | return false; | |||
48 | ||||
49 | return EmitHSAMetadata(HSAMetadata); | |||
50 | } | |||
51 | ||||
52 | bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { | |||
53 | msgpack::Document HSAMetadataDoc; | |||
54 | if (!HSAMetadataDoc.fromYAML(HSAMetadataString)) | |||
55 | return false; | |||
56 | return EmitHSAMetadata(HSAMetadataDoc, false); | |||
57 | } | |||
58 | ||||
59 | StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { | |||
60 | AMDGPU::GPUKind AK; | |||
| ||||
61 | ||||
62 | switch (ElfMach) { | |||
63 | case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; | |||
64 | case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; | |||
65 | case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; | |||
66 | case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; | |||
67 | case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; | |||
68 | case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; | |||
69 | case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; | |||
70 | case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; | |||
71 | case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; | |||
72 | case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; | |||
73 | case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; | |||
74 | case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; | |||
75 | case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; | |||
76 | case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; | |||
77 | case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; | |||
78 | case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; | |||
79 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; | |||
80 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; | |||
81 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; | |||
82 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; | |||
83 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; | |||
84 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; | |||
85 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; | |||
86 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; | |||
87 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; | |||
88 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; | |||
89 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; | |||
90 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; | |||
91 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; | |||
92 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; | |||
93 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; | |||
94 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; | |||
95 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; | |||
96 | case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; | |||
97 | } | |||
98 | ||||
99 | StringRef GPUName = getArchNameAMDGCN(AK); | |||
| ||||
100 | if (GPUName != "") | |||
101 | return GPUName; | |||
102 | return getArchNameR600(AK); | |||
103 | } | |||
104 | ||||
105 | unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { | |||
106 | AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); | |||
107 | if (AK == AMDGPU::GPUKind::GK_NONE) | |||
108 | AK = parseArchR600(GPU); | |||
109 | ||||
110 | switch (AK) { | |||
111 | case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; | |||
112 | case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; | |||
113 | case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; | |||
114 | case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; | |||
115 | case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; | |||
116 | case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; | |||
117 | case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; | |||
118 | case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; | |||
119 | case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; | |||
120 | case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; | |||
121 | case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; | |||
122 | case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; | |||
123 | case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; | |||
124 | case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; | |||
125 | case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; | |||
126 | case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; | |||
127 | case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; | |||
128 | case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; | |||
129 | case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; | |||
130 | case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; | |||
131 | case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; | |||
132 | case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; | |||
133 | case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; | |||
134 | case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; | |||
135 | case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; | |||
136 | case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; | |||
137 | case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; | |||
138 | case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; | |||
139 | case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; | |||
140 | case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; | |||
141 | case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; | |||
142 | case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; | |||
143 | case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; | |||
144 | case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; | |||
145 | } | |||
146 | ||||
147 | llvm_unreachable("unknown GPU")::llvm::llvm_unreachable_internal("unknown GPU", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp" , 147); | |||
148 | } | |||
149 | ||||
150 | //===----------------------------------------------------------------------===// | |||
151 | // AMDGPUTargetAsmStreamer | |||
152 | //===----------------------------------------------------------------------===// | |||
153 | ||||
154 | AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, | |||
155 | formatted_raw_ostream &OS) | |||
156 | : AMDGPUTargetStreamer(S), OS(OS) { } | |||
157 | ||||
158 | // A hook for emitting stuff at the end. | |||
159 | // We use it for emitting the accumulated PAL metadata as directives. | |||
160 | void AMDGPUTargetAsmStreamer::finish() { | |||
161 | std::string S; | |||
162 | getPALMetadata()->toString(S); | |||
163 | OS << S; | |||
164 | } | |||
165 | ||||
166 | void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) { | |||
167 | OS << "\t.amdgcn_target \"" << Target << "\"\n"; | |||
168 | } | |||
169 | ||||
170 | void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion( | |||
171 | uint32_t Major, uint32_t Minor) { | |||
172 | OS << "\t.hsa_code_object_version " << | |||
173 | Twine(Major) << "," << Twine(Minor) << '\n'; | |||
174 | } | |||
175 | ||||
176 | void | |||
177 | AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, | |||
178 | uint32_t Minor, | |||
179 | uint32_t Stepping, | |||
180 | StringRef VendorName, | |||
181 | StringRef ArchName) { | |||
182 | OS << "\t.hsa_code_object_isa " << | |||
183 | Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << | |||
184 | ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; | |||
185 | ||||
186 | } | |||
187 | ||||
188 | void | |||
189 | AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { | |||
190 | OS << "\t.amd_kernel_code_t\n"; | |||
191 | dumpAmdKernelCode(&Header, OS, "\t\t"); | |||
192 | OS << "\t.end_amd_kernel_code_t\n"; | |||
193 | } | |||
194 | ||||
195 | void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, | |||
196 | unsigned Type) { | |||
197 | switch (Type) { | |||
198 | default: llvm_unreachable("Invalid AMDGPU symbol type")::llvm::llvm_unreachable_internal("Invalid AMDGPU symbol type" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp" , 198); | |||
199 | case ELF::STT_AMDGPU_HSA_KERNEL: | |||
200 | OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; | |||
201 | break; | |||
202 | } | |||
203 | } | |||
204 | ||||
205 | bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { | |||
206 | OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; | |||
207 | return true; | |||
208 | } | |||
209 | ||||
210 | bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( | |||
211 | const AMDGPU::HSAMD::Metadata &HSAMetadata) { | |||
212 | std::string HSAMetadataString; | |||
213 | if (HSAMD::toString(HSAMetadata, HSAMetadataString)) | |||
214 | return false; | |||
215 | ||||
216 | OS << '\t' << AssemblerDirectiveBegin << '\n'; | |||
217 | OS << HSAMetadataString << '\n'; | |||
218 | OS << '\t' << AssemblerDirectiveEnd << '\n'; | |||
219 | return true; | |||
220 | } | |||
221 | ||||
222 | bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( | |||
223 | msgpack::Document &HSAMetadataDoc, bool Strict) { | |||
224 | V3::MetadataVerifier Verifier(Strict); | |||
225 | if (!Verifier.verify(HSAMetadataDoc.getRoot())) | |||
226 | return false; | |||
227 | ||||
228 | std::string HSAMetadataString; | |||
229 | raw_string_ostream StrOS(HSAMetadataString); | |||
230 | HSAMetadataDoc.toYAML(StrOS); | |||
231 | ||||
232 | OS << '\t' << V3::AssemblerDirectiveBegin << '\n'; | |||
233 | OS << StrOS.str() << '\n'; | |||
234 | OS << '\t' << V3::AssemblerDirectiveEnd << '\n'; | |||
235 | return true; | |||
236 | } | |||
237 | ||||
238 | bool AMDGPUTargetAsmStreamer::EmitCodeEnd() { | |||
239 | const uint32_t Encoded_s_code_end = 0xbf9f0000; | |||
240 | OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n'; | |||
241 | OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n'; | |||
242 | return true; | |||
243 | } | |||
244 | ||||
245 | void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( | |||
246 | const MCSubtargetInfo &STI, StringRef KernelName, | |||
247 | const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, | |||
248 | bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { | |||
249 | IsaVersion IVersion = getIsaVersion(STI.getCPU()); | |||
250 | ||||
251 | OS << "\t.amdhsa_kernel " << KernelName << '\n'; | |||
252 | ||||
253 | #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ | |||
254 | STREAM << "\t\t" << DIRECTIVE << " " \ | |||
255 | << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME)((KERNEL_DESC.MEMBER_NAME & FIELD_NAME) >> FIELD_NAME_SHIFT ) << '\n'; | |||
256 | ||||
257 | OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size | |||
258 | << '\n'; | |||
259 | OS << "\t\t.amdhsa_private_segment_fixed_size " | |||
260 | << KD.private_segment_fixed_size << '\n'; | |||
261 | ||||
262 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, | |||
263 | kernel_code_properties, | |||
264 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); | |||
265 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, | |||
266 | kernel_code_properties, | |||
267 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); | |||
268 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD, | |||
269 | kernel_code_properties, | |||
270 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); | |||
271 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, | |||
272 | kernel_code_properties, | |||
273 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); | |||
274 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD, | |||
275 | kernel_code_properties, | |||
276 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); | |||
277 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, | |||
278 | kernel_code_properties, | |||
279 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); | |||
280 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, | |||
281 | kernel_code_properties, | |||
282 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); | |||
283 | PRINT_FIELD( | |||
284 | OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, | |||
285 | compute_pgm_rsrc2, | |||
286 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); | |||
287 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, | |||
288 | compute_pgm_rsrc2, | |||
289 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); | |||
290 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, | |||
291 | compute_pgm_rsrc2, | |||
292 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); | |||
293 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, | |||
294 | compute_pgm_rsrc2, | |||
295 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); | |||
296 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD, | |||
297 | compute_pgm_rsrc2, | |||
298 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); | |||
299 | PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD, | |||
300 | compute_pgm_rsrc2, | |||
301 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); | |||
302 | ||||
303 | // These directives are required. | |||
304 | OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; | |||
305 | OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; | |||
306 | ||||
307 | if (!ReserveVCC) | |||
308 | OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; | |||
309 | if (IVersion.Major >= 7 && !ReserveFlatScr) | |||
310 | OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; | |||
311 | if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) | |||
312 | OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; | |||
313 | ||||
314 | PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, | |||
315 | compute_pgm_rsrc1, | |||
316 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); | |||
317 | PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD, | |||
318 | compute_pgm_rsrc1, | |||
319 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); | |||
320 | PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD, | |||
321 | compute_pgm_rsrc1, | |||
322 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); | |||
323 | PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, | |||
324 | compute_pgm_rsrc1, | |||
325 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); | |||
326 | PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, | |||
327 | compute_pgm_rsrc1, | |||
328 | amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); | |||
329 | PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, | |||
330 | compute_pgm_rsrc1, | |||
331 | amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); | |||
332 | if (IVersion.Major >= 9) | |||
333 | PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, | |||
334 | compute_pgm_rsrc1, | |||
335 | amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); | |||
336 | if (IVersion.Major >= 10) { | |||
337 | PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD, | |||
338 | compute_pgm_rsrc1, | |||
339 | amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE); | |||
340 | PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD, | |||
341 | compute_pgm_rsrc1, | |||
342 | amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED); | |||
343 | PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, | |||
344 | compute_pgm_rsrc1, | |||
345 | amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS); | |||
346 | } | |||
347 | PRINT_FIELD( | |||
348 | OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, | |||
349 | compute_pgm_rsrc2, | |||
350 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); | |||
351 | PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD, | |||
352 | compute_pgm_rsrc2, | |||
353 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); | |||
354 | PRINT_FIELD( | |||
355 | OS, ".amdhsa_exception_fp_ieee_div_zero", KD, | |||
356 | compute_pgm_rsrc2, | |||
357 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); | |||
358 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD, | |||
359 | compute_pgm_rsrc2, | |||
360 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); | |||
361 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD, | |||
362 | compute_pgm_rsrc2, | |||
363 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); | |||
364 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD, | |||
365 | compute_pgm_rsrc2, | |||
366 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); | |||
367 | PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD, | |||
368 | compute_pgm_rsrc2, | |||
369 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); | |||
370 | #undef PRINT_FIELD | |||
371 | ||||
372 | OS << "\t.end_amdhsa_kernel\n"; | |||
373 | } | |||
374 | ||||
375 | //===----------------------------------------------------------------------===// | |||
376 | // AMDGPUTargetELFStreamer | |||
377 | //===----------------------------------------------------------------------===// | |||
378 | ||||
379 | AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( | |||
380 | MCStreamer &S, const MCSubtargetInfo &STI) | |||
381 | : AMDGPUTargetStreamer(S), Streamer(S) { | |||
382 | MCAssembler &MCA = getStreamer().getAssembler(); | |||
383 | unsigned EFlags = MCA.getELFHeaderEFlags(); | |||
384 | ||||
385 | EFlags &= ~ELF::EF_AMDGPU_MACH; | |||
386 | EFlags |= getElfMach(STI.getCPU()); | |||
387 | ||||
388 | EFlags &= ~ELF::EF_AMDGPU_XNACK; | |||
389 | if (AMDGPU::hasXNACK(STI)) | |||
390 | EFlags |= ELF::EF_AMDGPU_XNACK; | |||
391 | ||||
392 | EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC; | |||
393 | if (AMDGPU::hasSRAMECC(STI)) | |||
394 | EFlags |= ELF::EF_AMDGPU_SRAM_ECC; | |||
395 | ||||
396 | MCA.setELFHeaderEFlags(EFlags); | |||
397 | } | |||
398 | ||||
399 | MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { | |||
400 | return static_cast<MCELFStreamer &>(Streamer); | |||
401 | } | |||
402 | ||||
403 | // A hook for emitting stuff at the end. | |||
404 | // We use it for emitting the accumulated PAL metadata as a .note record. | |||
405 | void AMDGPUTargetELFStreamer::finish() { | |||
406 | std::string Blob; | |||
407 | const char *Vendor = getPALMetadata()->getVendor(); | |||
408 | unsigned Type = getPALMetadata()->getType(); | |||
409 | getPALMetadata()->toBlob(Type, Blob); | |||
410 | if (Blob.empty()) | |||
411 | return; | |||
412 | EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type, | |||
413 | [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); }); | |||
414 | } | |||
415 | ||||
416 | void AMDGPUTargetELFStreamer::EmitNote( | |||
417 | StringRef Name, const MCExpr *DescSZ, unsigned NoteType, | |||
418 | function_ref<void(MCELFStreamer &)> EmitDesc) { | |||
419 | auto &S = getStreamer(); | |||
420 | auto &Context = S.getContext(); | |||
421 | ||||
422 | auto NameSZ = Name.size() + 1; | |||
423 | ||||
424 | S.PushSection(); | |||
425 | S.SwitchSection(Context.getELFSection( | |||
426 | ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); | |||
427 | S.EmitIntValue(NameSZ, 4); // namesz | |||
428 | S.EmitValue(DescSZ, 4); // descz | |||
429 | S.EmitIntValue(NoteType, 4); // type | |||
430 | S.EmitBytes(Name); // name | |||
431 | S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 | |||
432 | EmitDesc(S); // desc | |||
433 | S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 | |||
434 | S.PopSection(); | |||
435 | } | |||
436 | ||||
437 | void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {} | |||
438 | ||||
439 | void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( | |||
440 | uint32_t Major, uint32_t Minor) { | |||
441 | ||||
442 | EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), | |||
443 | ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { | |||
444 | OS.EmitIntValue(Major, 4); | |||
445 | OS.EmitIntValue(Minor, 4); | |||
446 | }); | |||
447 | } | |||
448 | ||||
449 | void | |||
450 | AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, | |||
451 | uint32_t Minor, | |||
452 | uint32_t Stepping, | |||
453 | StringRef VendorName, | |||
454 | StringRef ArchName) { | |||
455 | uint16_t VendorNameSize = VendorName.size() + 1; | |||
456 | uint16_t ArchNameSize = ArchName.size() + 1; | |||
457 | ||||
458 | unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + | |||
459 | sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + | |||
460 | VendorNameSize + ArchNameSize; | |||
461 | ||||
462 | EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), | |||
463 | ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { | |||
464 | OS.EmitIntValue(VendorNameSize, 2); | |||
465 | OS.EmitIntValue(ArchNameSize, 2); | |||
466 | OS.EmitIntValue(Major, 4); | |||
467 | OS.EmitIntValue(Minor, 4); | |||
468 | OS.EmitIntValue(Stepping, 4); | |||
469 | OS.EmitBytes(VendorName); | |||
470 | OS.EmitIntValue(0, 1); // NULL terminate VendorName | |||
471 | OS.EmitBytes(ArchName); | |||
472 | OS.EmitIntValue(0, 1); // NULL terminte ArchName | |||
473 | }); | |||
474 | } | |||
475 | ||||
476 | void | |||
477 | AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { | |||
478 | ||||
479 | MCStreamer &OS = getStreamer(); | |||
480 | OS.PushSection(); | |||
481 | OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); | |||
482 | OS.PopSection(); | |||
483 | } | |||
484 | ||||
485 | void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, | |||
486 | unsigned Type) { | |||
487 | MCSymbolELF *Symbol = cast<MCSymbolELF>( | |||
488 | getStreamer().getContext().getOrCreateSymbol(SymbolName)); | |||
489 | Symbol->setType(Type); | |||
490 | } | |||
491 | ||||
492 | bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { | |||
493 | // Create two labels to mark the beginning and end of the desc field | |||
494 | // and a MCExpr to calculate the size of the desc field. | |||
495 | auto &Context = getContext(); | |||
496 | auto *DescBegin = Context.createTempSymbol(); | |||
497 | auto *DescEnd = Context.createTempSymbol(); | |||
498 | auto *DescSZ = MCBinaryExpr::createSub( | |||
499 | MCSymbolRefExpr::create(DescEnd, Context), | |||
500 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
501 | ||||
502 | EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, | |||
503 | [&](MCELFStreamer &OS) { | |||
504 | OS.EmitLabel(DescBegin); | |||
505 | OS.EmitBytes(IsaVersionString); | |||
506 | OS.EmitLabel(DescEnd); | |||
507 | }); | |||
508 | return true; | |||
509 | } | |||
510 | ||||
511 | bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, | |||
512 | bool Strict) { | |||
513 | V3::MetadataVerifier Verifier(Strict); | |||
514 | if (!Verifier.verify(HSAMetadataDoc.getRoot())) | |||
515 | return false; | |||
516 | ||||
517 | std::string HSAMetadataString; | |||
518 | HSAMetadataDoc.writeToBlob(HSAMetadataString); | |||
519 | ||||
520 | // Create two labels to mark the beginning and end of the desc field | |||
521 | // and a MCExpr to calculate the size of the desc field. | |||
522 | auto &Context = getContext(); | |||
523 | auto *DescBegin = Context.createTempSymbol(); | |||
524 | auto *DescEnd = Context.createTempSymbol(); | |||
525 | auto *DescSZ = MCBinaryExpr::createSub( | |||
526 | MCSymbolRefExpr::create(DescEnd, Context), | |||
527 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
528 | ||||
529 | EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, | |||
530 | [&](MCELFStreamer &OS) { | |||
531 | OS.EmitLabel(DescBegin); | |||
532 | OS.EmitBytes(HSAMetadataString); | |||
533 | OS.EmitLabel(DescEnd); | |||
534 | }); | |||
535 | return true; | |||
536 | } | |||
537 | ||||
538 | bool AMDGPUTargetELFStreamer::EmitHSAMetadata( | |||
539 | const AMDGPU::HSAMD::Metadata &HSAMetadata) { | |||
540 | std::string HSAMetadataString; | |||
541 | if (HSAMD::toString(HSAMetadata, HSAMetadataString)) | |||
542 | return false; | |||
543 | ||||
544 | // Create two labels to mark the beginning and end of the desc field | |||
545 | // and a MCExpr to calculate the size of the desc field. | |||
546 | auto &Context = getContext(); | |||
547 | auto *DescBegin = Context.createTempSymbol(); | |||
548 | auto *DescEnd = Context.createTempSymbol(); | |||
549 | auto *DescSZ = MCBinaryExpr::createSub( | |||
550 | MCSymbolRefExpr::create(DescEnd, Context), | |||
551 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
552 | ||||
553 | EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, | |||
554 | [&](MCELFStreamer &OS) { | |||
555 | OS.EmitLabel(DescBegin); | |||
556 | OS.EmitBytes(HSAMetadataString); | |||
557 | OS.EmitLabel(DescEnd); | |||
558 | }); | |||
559 | return true; | |||
560 | } | |||
561 | ||||
562 | bool AMDGPUTargetELFStreamer::EmitCodeEnd() { | |||
563 | const uint32_t Encoded_s_code_end = 0xbf9f0000; | |||
564 | ||||
565 | MCStreamer &OS = getStreamer(); | |||
566 | OS.PushSection(); | |||
567 | OS.EmitValueToAlignment(64, Encoded_s_code_end, 4); | |||
568 | for (unsigned I = 0; I < 32; ++I) | |||
569 | OS.EmitIntValue(Encoded_s_code_end, 4); | |||
570 | OS.PopSection(); | |||
571 | return true; | |||
572 | } | |||
573 | ||||
574 | void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( | |||
575 | const MCSubtargetInfo &STI, StringRef KernelName, | |||
576 | const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, | |||
577 | uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, | |||
578 | bool ReserveXNACK) { | |||
579 | auto &Streamer = getStreamer(); | |||
580 | auto &Context = Streamer.getContext(); | |||
581 | ||||
582 | MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( | |||
583 | Context.getOrCreateSymbol(Twine(KernelName))); | |||
584 | MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( | |||
585 | Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); | |||
586 | ||||
587 | // Copy kernel descriptor symbol's binding, other and visibility from the | |||
588 | // kernel code symbol. | |||
589 | KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding()); | |||
590 | KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther()); | |||
591 | KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility()); | |||
592 | // Kernel descriptor symbol's type and size are fixed. | |||
593 | KernelDescriptorSymbol->setType(ELF::STT_OBJECT); | |||
594 | KernelDescriptorSymbol->setSize( | |||
595 | MCConstantExpr::create(sizeof(KernelDescriptor), Context)); | |||
596 | ||||
597 | // The visibility of the kernel code symbol must be protected or less to allow | |||
598 | // static relocations from the kernel descriptor to be used. | |||
599 | if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT) | |||
600 | KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); | |||
601 | ||||
602 | Streamer.EmitLabel(KernelDescriptorSymbol); | |||
603 | Streamer.EmitBytes(StringRef( | |||
604 | (const char*)&(KernelDescriptor), | |||
605 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ))); | |||
606 | // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The | |||
607 | // expression being created is: | |||
608 | // (start of kernel code) - (start of kernel descriptor) | |||
609 | // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. | |||
610 | Streamer.EmitValue(MCBinaryExpr::createSub( | |||
611 | MCSymbolRefExpr::create( | |||
612 | KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), | |||
613 | MCSymbolRefExpr::create( | |||
614 | KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), | |||
615 | Context), | |||
616 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); | |||
617 | Streamer.EmitBytes(StringRef( | |||
618 | (const char*)&(KernelDescriptor) + | |||
619 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ) + | |||
620 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset), | |||
621 | sizeof(KernelDescriptor) - | |||
622 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ) - | |||
623 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset))); | |||
624 | } |