File: | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp |
Warning: | line 102, column 23 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file provides AMDGPU specific target streamer methods. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "AMDGPUTargetStreamer.h" | |||
14 | #include "AMDGPU.h" | |||
15 | #include "SIDefines.h" | |||
16 | #include "Utils/AMDGPUBaseInfo.h" | |||
17 | #include "Utils/AMDKernelCodeTUtils.h" | |||
18 | #include "llvm/ADT/Twine.h" | |||
19 | #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" | |||
20 | #include "llvm/BinaryFormat/ELF.h" | |||
21 | #include "llvm/IR/Constants.h" | |||
22 | #include "llvm/IR/Function.h" | |||
23 | #include "llvm/IR/Metadata.h" | |||
24 | #include "llvm/IR/Module.h" | |||
25 | #include "llvm/MC/MCContext.h" | |||
26 | #include "llvm/MC/MCELFStreamer.h" | |||
27 | #include "llvm/MC/MCObjectFileInfo.h" | |||
28 | #include "llvm/MC/MCSectionELF.h" | |||
29 | #include "llvm/Support/FormattedStream.h" | |||
30 | #include "llvm/Support/TargetParser.h" | |||
31 | ||||
32 | namespace llvm { | |||
33 | #include "AMDGPUPTNote.h" | |||
34 | } | |||
35 | ||||
36 | using namespace llvm; | |||
37 | using namespace llvm::AMDGPU; | |||
38 | using namespace llvm::AMDGPU::HSAMD; | |||
39 | ||||
40 | //===----------------------------------------------------------------------===// | |||
41 | // AMDGPUTargetStreamer | |||
42 | //===----------------------------------------------------------------------===// | |||
43 | ||||
44 | bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { | |||
45 | HSAMD::Metadata HSAMetadata; | |||
46 | if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) | |||
47 | return false; | |||
48 | ||||
49 | return EmitHSAMetadata(HSAMetadata); | |||
50 | } | |||
51 | ||||
52 | bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { | |||
53 | msgpack::Document HSAMetadataDoc; | |||
54 | if (!HSAMetadataDoc.fromYAML(HSAMetadataString)) | |||
55 | return false; | |||
56 | return EmitHSAMetadata(HSAMetadataDoc, false); | |||
57 | } | |||
58 | ||||
59 | StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { | |||
60 | AMDGPU::GPUKind AK; | |||
| ||||
61 | ||||
62 | switch (ElfMach) { | |||
63 | case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; | |||
64 | case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; | |||
65 | case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; | |||
66 | case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; | |||
67 | case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; | |||
68 | case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; | |||
69 | case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; | |||
70 | case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; | |||
71 | case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; | |||
72 | case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; | |||
73 | case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; | |||
74 | case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; | |||
75 | case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; | |||
76 | case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; | |||
77 | case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; | |||
78 | case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; | |||
79 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; | |||
80 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; | |||
81 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; | |||
82 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; | |||
83 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; | |||
84 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; | |||
85 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; | |||
86 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; | |||
87 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; | |||
88 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; | |||
89 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; | |||
90 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; | |||
91 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; | |||
92 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; | |||
93 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; | |||
94 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break; | |||
95 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; | |||
96 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; | |||
97 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; | |||
98 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; | |||
99 | case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; | |||
100 | } | |||
101 | ||||
102 | StringRef GPUName = getArchNameAMDGCN(AK); | |||
| ||||
103 | if (GPUName != "") | |||
104 | return GPUName; | |||
105 | return getArchNameR600(AK); | |||
106 | } | |||
107 | ||||
108 | unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { | |||
109 | AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); | |||
110 | if (AK == AMDGPU::GPUKind::GK_NONE) | |||
111 | AK = parseArchR600(GPU); | |||
112 | ||||
113 | switch (AK) { | |||
114 | case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; | |||
115 | case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; | |||
116 | case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; | |||
117 | case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; | |||
118 | case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; | |||
119 | case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; | |||
120 | case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; | |||
121 | case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; | |||
122 | case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; | |||
123 | case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; | |||
124 | case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; | |||
125 | case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; | |||
126 | case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; | |||
127 | case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; | |||
128 | case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; | |||
129 | case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; | |||
130 | case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; | |||
131 | case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; | |||
132 | case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; | |||
133 | case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; | |||
134 | case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; | |||
135 | case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; | |||
136 | case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; | |||
137 | case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; | |||
138 | case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; | |||
139 | case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; | |||
140 | case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; | |||
141 | case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; | |||
142 | case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; | |||
143 | case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; | |||
144 | case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; | |||
145 | case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908; | |||
146 | case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; | |||
147 | case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; | |||
148 | case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; | |||
149 | case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; | |||
150 | case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; | |||
151 | } | |||
152 | ||||
153 | llvm_unreachable("unknown GPU")::llvm::llvm_unreachable_internal("unknown GPU", "/build/llvm-toolchain-snapshot-10~++20200106111110+0efc9e5a8cc/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp" , 153); | |||
154 | } | |||
155 | ||||
156 | //===----------------------------------------------------------------------===// | |||
157 | // AMDGPUTargetAsmStreamer | |||
158 | //===----------------------------------------------------------------------===// | |||
159 | ||||
160 | AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, | |||
161 | formatted_raw_ostream &OS) | |||
162 | : AMDGPUTargetStreamer(S), OS(OS) { } | |||
163 | ||||
164 | // A hook for emitting stuff at the end. | |||
165 | // We use it for emitting the accumulated PAL metadata as directives. | |||
166 | void AMDGPUTargetAsmStreamer::finish() { | |||
167 | std::string S; | |||
168 | getPALMetadata()->toString(S); | |||
169 | OS << S; | |||
170 | } | |||
171 | ||||
172 | void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) { | |||
173 | OS << "\t.amdgcn_target \"" << Target << "\"\n"; | |||
174 | } | |||
175 | ||||
176 | void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion( | |||
177 | uint32_t Major, uint32_t Minor) { | |||
178 | OS << "\t.hsa_code_object_version " << | |||
179 | Twine(Major) << "," << Twine(Minor) << '\n'; | |||
180 | } | |||
181 | ||||
182 | void | |||
183 | AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, | |||
184 | uint32_t Minor, | |||
185 | uint32_t Stepping, | |||
186 | StringRef VendorName, | |||
187 | StringRef ArchName) { | |||
188 | OS << "\t.hsa_code_object_isa " << | |||
189 | Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << | |||
190 | ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; | |||
191 | ||||
192 | } | |||
193 | ||||
194 | void | |||
195 | AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { | |||
196 | OS << "\t.amd_kernel_code_t\n"; | |||
197 | dumpAmdKernelCode(&Header, OS, "\t\t"); | |||
198 | OS << "\t.end_amd_kernel_code_t\n"; | |||
199 | } | |||
200 | ||||
201 | void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, | |||
202 | unsigned Type) { | |||
203 | switch (Type) { | |||
204 | default: llvm_unreachable("Invalid AMDGPU symbol type")::llvm::llvm_unreachable_internal("Invalid AMDGPU symbol type" , "/build/llvm-toolchain-snapshot-10~++20200106111110+0efc9e5a8cc/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp" , 204); | |||
205 | case ELF::STT_AMDGPU_HSA_KERNEL: | |||
206 | OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; | |||
207 | break; | |||
208 | } | |||
209 | } | |||
210 | ||||
211 | void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, | |||
212 | unsigned Align) { | |||
213 | OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align | |||
214 | << '\n'; | |||
215 | } | |||
216 | ||||
217 | bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { | |||
218 | OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; | |||
219 | return true; | |||
220 | } | |||
221 | ||||
222 | bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( | |||
223 | const AMDGPU::HSAMD::Metadata &HSAMetadata) { | |||
224 | std::string HSAMetadataString; | |||
225 | if (HSAMD::toString(HSAMetadata, HSAMetadataString)) | |||
226 | return false; | |||
227 | ||||
228 | OS << '\t' << AssemblerDirectiveBegin << '\n'; | |||
229 | OS << HSAMetadataString << '\n'; | |||
230 | OS << '\t' << AssemblerDirectiveEnd << '\n'; | |||
231 | return true; | |||
232 | } | |||
233 | ||||
234 | bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( | |||
235 | msgpack::Document &HSAMetadataDoc, bool Strict) { | |||
236 | V3::MetadataVerifier Verifier(Strict); | |||
237 | if (!Verifier.verify(HSAMetadataDoc.getRoot())) | |||
238 | return false; | |||
239 | ||||
240 | std::string HSAMetadataString; | |||
241 | raw_string_ostream StrOS(HSAMetadataString); | |||
242 | HSAMetadataDoc.toYAML(StrOS); | |||
243 | ||||
244 | OS << '\t' << V3::AssemblerDirectiveBegin << '\n'; | |||
245 | OS << StrOS.str() << '\n'; | |||
246 | OS << '\t' << V3::AssemblerDirectiveEnd << '\n'; | |||
247 | return true; | |||
248 | } | |||
249 | ||||
250 | bool AMDGPUTargetAsmStreamer::EmitCodeEnd() { | |||
251 | const uint32_t Encoded_s_code_end = 0xbf9f0000; | |||
252 | OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n'; | |||
253 | OS << "\t.fill 48, 4, " << Encoded_s_code_end << '\n'; | |||
254 | return true; | |||
255 | } | |||
256 | ||||
257 | void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( | |||
258 | const MCSubtargetInfo &STI, StringRef KernelName, | |||
259 | const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, | |||
260 | bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { | |||
261 | IsaVersion IVersion = getIsaVersion(STI.getCPU()); | |||
262 | ||||
263 | OS << "\t.amdhsa_kernel " << KernelName << '\n'; | |||
264 | ||||
265 | #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ | |||
266 | STREAM << "\t\t" << DIRECTIVE << " " \ | |||
267 | << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME)((KERNEL_DESC.MEMBER_NAME & FIELD_NAME) >> FIELD_NAME_SHIFT ) << '\n'; | |||
268 | ||||
269 | OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size | |||
270 | << '\n'; | |||
271 | OS << "\t\t.amdhsa_private_segment_fixed_size " | |||
272 | << KD.private_segment_fixed_size << '\n'; | |||
273 | ||||
274 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, | |||
275 | kernel_code_properties, | |||
276 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); | |||
277 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, | |||
278 | kernel_code_properties, | |||
279 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); | |||
280 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD, | |||
281 | kernel_code_properties, | |||
282 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); | |||
283 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, | |||
284 | kernel_code_properties, | |||
285 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); | |||
286 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD, | |||
287 | kernel_code_properties, | |||
288 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); | |||
289 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, | |||
290 | kernel_code_properties, | |||
291 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); | |||
292 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, | |||
293 | kernel_code_properties, | |||
294 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); | |||
295 | if (IVersion.Major >= 10) | |||
296 | PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, | |||
297 | kernel_code_properties, | |||
298 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); | |||
299 | PRINT_FIELD( | |||
300 | OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, | |||
301 | compute_pgm_rsrc2, | |||
302 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); | |||
303 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, | |||
304 | compute_pgm_rsrc2, | |||
305 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); | |||
306 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, | |||
307 | compute_pgm_rsrc2, | |||
308 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); | |||
309 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, | |||
310 | compute_pgm_rsrc2, | |||
311 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); | |||
312 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD, | |||
313 | compute_pgm_rsrc2, | |||
314 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); | |||
315 | PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD, | |||
316 | compute_pgm_rsrc2, | |||
317 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); | |||
318 | ||||
319 | // These directives are required. | |||
320 | OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; | |||
321 | OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; | |||
322 | ||||
323 | if (!ReserveVCC) | |||
324 | OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; | |||
325 | if (IVersion.Major >= 7 && !ReserveFlatScr) | |||
326 | OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; | |||
327 | if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) | |||
328 | OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; | |||
329 | ||||
330 | PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, | |||
331 | compute_pgm_rsrc1, | |||
332 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); | |||
333 | PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD, | |||
334 | compute_pgm_rsrc1, | |||
335 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); | |||
336 | PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD, | |||
337 | compute_pgm_rsrc1, | |||
338 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); | |||
339 | PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, | |||
340 | compute_pgm_rsrc1, | |||
341 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); | |||
342 | PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, | |||
343 | compute_pgm_rsrc1, | |||
344 | amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); | |||
345 | PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, | |||
346 | compute_pgm_rsrc1, | |||
347 | amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); | |||
348 | if (IVersion.Major >= 9) | |||
349 | PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, | |||
350 | compute_pgm_rsrc1, | |||
351 | amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); | |||
352 | if (IVersion.Major >= 10) { | |||
353 | PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD, | |||
354 | compute_pgm_rsrc1, | |||
355 | amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE); | |||
356 | PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD, | |||
357 | compute_pgm_rsrc1, | |||
358 | amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED); | |||
359 | PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, | |||
360 | compute_pgm_rsrc1, | |||
361 | amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS); | |||
362 | } | |||
363 | PRINT_FIELD( | |||
364 | OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, | |||
365 | compute_pgm_rsrc2, | |||
366 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); | |||
367 | PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD, | |||
368 | compute_pgm_rsrc2, | |||
369 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); | |||
370 | PRINT_FIELD( | |||
371 | OS, ".amdhsa_exception_fp_ieee_div_zero", KD, | |||
372 | compute_pgm_rsrc2, | |||
373 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); | |||
374 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD, | |||
375 | compute_pgm_rsrc2, | |||
376 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); | |||
377 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD, | |||
378 | compute_pgm_rsrc2, | |||
379 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); | |||
380 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD, | |||
381 | compute_pgm_rsrc2, | |||
382 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); | |||
383 | PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD, | |||
384 | compute_pgm_rsrc2, | |||
385 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); | |||
386 | #undef PRINT_FIELD | |||
387 | ||||
388 | OS << "\t.end_amdhsa_kernel\n"; | |||
389 | } | |||
390 | ||||
391 | //===----------------------------------------------------------------------===// | |||
392 | // AMDGPUTargetELFStreamer | |||
393 | //===----------------------------------------------------------------------===// | |||
394 | ||||
395 | AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( | |||
396 | MCStreamer &S, const MCSubtargetInfo &STI) | |||
397 | : AMDGPUTargetStreamer(S), Streamer(S) { | |||
398 | MCAssembler &MCA = getStreamer().getAssembler(); | |||
399 | unsigned EFlags = MCA.getELFHeaderEFlags(); | |||
400 | ||||
401 | EFlags &= ~ELF::EF_AMDGPU_MACH; | |||
402 | EFlags |= getElfMach(STI.getCPU()); | |||
403 | ||||
404 | EFlags &= ~ELF::EF_AMDGPU_XNACK; | |||
405 | if (AMDGPU::hasXNACK(STI)) | |||
406 | EFlags |= ELF::EF_AMDGPU_XNACK; | |||
407 | ||||
408 | EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC; | |||
409 | if (AMDGPU::hasSRAMECC(STI)) | |||
410 | EFlags |= ELF::EF_AMDGPU_SRAM_ECC; | |||
411 | ||||
412 | MCA.setELFHeaderEFlags(EFlags); | |||
413 | } | |||
414 | ||||
415 | MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { | |||
416 | return static_cast<MCELFStreamer &>(Streamer); | |||
417 | } | |||
418 | ||||
419 | // A hook for emitting stuff at the end. | |||
420 | // We use it for emitting the accumulated PAL metadata as a .note record. | |||
421 | void AMDGPUTargetELFStreamer::finish() { | |||
422 | std::string Blob; | |||
423 | const char *Vendor = getPALMetadata()->getVendor(); | |||
424 | unsigned Type = getPALMetadata()->getType(); | |||
425 | getPALMetadata()->toBlob(Type, Blob); | |||
426 | if (Blob.empty()) | |||
427 | return; | |||
428 | EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type, | |||
429 | [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); }); | |||
430 | } | |||
431 | ||||
432 | void AMDGPUTargetELFStreamer::EmitNote( | |||
433 | StringRef Name, const MCExpr *DescSZ, unsigned NoteType, | |||
434 | function_ref<void(MCELFStreamer &)> EmitDesc) { | |||
435 | auto &S = getStreamer(); | |||
436 | auto &Context = S.getContext(); | |||
437 | ||||
438 | auto NameSZ = Name.size() + 1; | |||
439 | ||||
440 | S.PushSection(); | |||
441 | S.SwitchSection(Context.getELFSection( | |||
442 | ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); | |||
443 | S.EmitIntValue(NameSZ, 4); // namesz | |||
444 | S.EmitValue(DescSZ, 4); // descz | |||
445 | S.EmitIntValue(NoteType, 4); // type | |||
446 | S.EmitBytes(Name); // name | |||
447 | S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 | |||
448 | EmitDesc(S); // desc | |||
449 | S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 | |||
450 | S.PopSection(); | |||
451 | } | |||
452 | ||||
453 | void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {} | |||
454 | ||||
455 | void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( | |||
456 | uint32_t Major, uint32_t Minor) { | |||
457 | ||||
458 | EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), | |||
459 | ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { | |||
460 | OS.EmitIntValue(Major, 4); | |||
461 | OS.EmitIntValue(Minor, 4); | |||
462 | }); | |||
463 | } | |||
464 | ||||
465 | void | |||
466 | AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, | |||
467 | uint32_t Minor, | |||
468 | uint32_t Stepping, | |||
469 | StringRef VendorName, | |||
470 | StringRef ArchName) { | |||
471 | uint16_t VendorNameSize = VendorName.size() + 1; | |||
472 | uint16_t ArchNameSize = ArchName.size() + 1; | |||
473 | ||||
474 | unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + | |||
475 | sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + | |||
476 | VendorNameSize + ArchNameSize; | |||
477 | ||||
478 | EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), | |||
479 | ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { | |||
480 | OS.EmitIntValue(VendorNameSize, 2); | |||
481 | OS.EmitIntValue(ArchNameSize, 2); | |||
482 | OS.EmitIntValue(Major, 4); | |||
483 | OS.EmitIntValue(Minor, 4); | |||
484 | OS.EmitIntValue(Stepping, 4); | |||
485 | OS.EmitBytes(VendorName); | |||
486 | OS.EmitIntValue(0, 1); // NULL terminate VendorName | |||
487 | OS.EmitBytes(ArchName); | |||
488 | OS.EmitIntValue(0, 1); // NULL terminte ArchName | |||
489 | }); | |||
490 | } | |||
491 | ||||
492 | void | |||
493 | AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { | |||
494 | ||||
495 | MCStreamer &OS = getStreamer(); | |||
496 | OS.PushSection(); | |||
497 | OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); | |||
498 | OS.PopSection(); | |||
499 | } | |||
500 | ||||
501 | void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, | |||
502 | unsigned Type) { | |||
503 | MCSymbolELF *Symbol = cast<MCSymbolELF>( | |||
504 | getStreamer().getContext().getOrCreateSymbol(SymbolName)); | |||
505 | Symbol->setType(Type); | |||
506 | } | |||
507 | ||||
508 | void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, | |||
509 | unsigned Align) { | |||
510 | assert(isPowerOf2_32(Align))((isPowerOf2_32(Align)) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(Align)", "/build/llvm-toolchain-snapshot-10~++20200106111110+0efc9e5a8cc/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp" , 510, __PRETTY_FUNCTION__)); | |||
511 | ||||
512 | MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol); | |||
513 | SymbolELF->setType(ELF::STT_OBJECT); | |||
514 | ||||
515 | if (!SymbolELF->isBindingSet()) { | |||
516 | SymbolELF->setBinding(ELF::STB_GLOBAL); | |||
517 | SymbolELF->setExternal(true); | |||
518 | } | |||
519 | ||||
520 | if (SymbolELF->declareCommon(Size, Align, true)) { | |||
521 | report_fatal_error("Symbol: " + Symbol->getName() + | |||
522 | " redeclared as different type"); | |||
523 | } | |||
524 | ||||
525 | SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS); | |||
526 | SymbolELF->setSize(MCConstantExpr::create(Size, getContext())); | |||
527 | } | |||
528 | ||||
529 | bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { | |||
530 | // Create two labels to mark the beginning and end of the desc field | |||
531 | // and a MCExpr to calculate the size of the desc field. | |||
532 | auto &Context = getContext(); | |||
533 | auto *DescBegin = Context.createTempSymbol(); | |||
534 | auto *DescEnd = Context.createTempSymbol(); | |||
535 | auto *DescSZ = MCBinaryExpr::createSub( | |||
536 | MCSymbolRefExpr::create(DescEnd, Context), | |||
537 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
538 | ||||
539 | EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, | |||
540 | [&](MCELFStreamer &OS) { | |||
541 | OS.EmitLabel(DescBegin); | |||
542 | OS.EmitBytes(IsaVersionString); | |||
543 | OS.EmitLabel(DescEnd); | |||
544 | }); | |||
545 | return true; | |||
546 | } | |||
547 | ||||
548 | bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, | |||
549 | bool Strict) { | |||
550 | V3::MetadataVerifier Verifier(Strict); | |||
551 | if (!Verifier.verify(HSAMetadataDoc.getRoot())) | |||
552 | return false; | |||
553 | ||||
554 | std::string HSAMetadataString; | |||
555 | HSAMetadataDoc.writeToBlob(HSAMetadataString); | |||
556 | ||||
557 | // Create two labels to mark the beginning and end of the desc field | |||
558 | // and a MCExpr to calculate the size of the desc field. | |||
559 | auto &Context = getContext(); | |||
560 | auto *DescBegin = Context.createTempSymbol(); | |||
561 | auto *DescEnd = Context.createTempSymbol(); | |||
562 | auto *DescSZ = MCBinaryExpr::createSub( | |||
563 | MCSymbolRefExpr::create(DescEnd, Context), | |||
564 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
565 | ||||
566 | EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, | |||
567 | [&](MCELFStreamer &OS) { | |||
568 | OS.EmitLabel(DescBegin); | |||
569 | OS.EmitBytes(HSAMetadataString); | |||
570 | OS.EmitLabel(DescEnd); | |||
571 | }); | |||
572 | return true; | |||
573 | } | |||
574 | ||||
575 | bool AMDGPUTargetELFStreamer::EmitHSAMetadata( | |||
576 | const AMDGPU::HSAMD::Metadata &HSAMetadata) { | |||
577 | std::string HSAMetadataString; | |||
578 | if (HSAMD::toString(HSAMetadata, HSAMetadataString)) | |||
579 | return false; | |||
580 | ||||
581 | // Create two labels to mark the beginning and end of the desc field | |||
582 | // and a MCExpr to calculate the size of the desc field. | |||
583 | auto &Context = getContext(); | |||
584 | auto *DescBegin = Context.createTempSymbol(); | |||
585 | auto *DescEnd = Context.createTempSymbol(); | |||
586 | auto *DescSZ = MCBinaryExpr::createSub( | |||
587 | MCSymbolRefExpr::create(DescEnd, Context), | |||
588 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
589 | ||||
590 | EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, | |||
591 | [&](MCELFStreamer &OS) { | |||
592 | OS.EmitLabel(DescBegin); | |||
593 | OS.EmitBytes(HSAMetadataString); | |||
594 | OS.EmitLabel(DescEnd); | |||
595 | }); | |||
596 | return true; | |||
597 | } | |||
598 | ||||
599 | bool AMDGPUTargetELFStreamer::EmitCodeEnd() { | |||
600 | const uint32_t Encoded_s_code_end = 0xbf9f0000; | |||
601 | ||||
602 | MCStreamer &OS = getStreamer(); | |||
603 | OS.PushSection(); | |||
604 | OS.EmitValueToAlignment(64, Encoded_s_code_end, 4); | |||
605 | for (unsigned I = 0; I < 48; ++I) | |||
606 | OS.EmitIntValue(Encoded_s_code_end, 4); | |||
607 | OS.PopSection(); | |||
608 | return true; | |||
609 | } | |||
610 | ||||
611 | void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( | |||
612 | const MCSubtargetInfo &STI, StringRef KernelName, | |||
613 | const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, | |||
614 | uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, | |||
615 | bool ReserveXNACK) { | |||
616 | auto &Streamer = getStreamer(); | |||
617 | auto &Context = Streamer.getContext(); | |||
618 | ||||
619 | MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( | |||
620 | Context.getOrCreateSymbol(Twine(KernelName))); | |||
621 | MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( | |||
622 | Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); | |||
623 | ||||
624 | // Copy kernel descriptor symbol's binding, other and visibility from the | |||
625 | // kernel code symbol. | |||
626 | KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding()); | |||
627 | KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther()); | |||
628 | KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility()); | |||
629 | // Kernel descriptor symbol's type and size are fixed. | |||
630 | KernelDescriptorSymbol->setType(ELF::STT_OBJECT); | |||
631 | KernelDescriptorSymbol->setSize( | |||
632 | MCConstantExpr::create(sizeof(KernelDescriptor), Context)); | |||
633 | ||||
634 | // The visibility of the kernel code symbol must be protected or less to allow | |||
635 | // static relocations from the kernel descriptor to be used. | |||
636 | if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT) | |||
637 | KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); | |||
638 | ||||
639 | Streamer.EmitLabel(KernelDescriptorSymbol); | |||
640 | Streamer.EmitBytes(StringRef( | |||
641 | (const char*)&(KernelDescriptor), | |||
642 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ))); | |||
643 | // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The | |||
644 | // expression being created is: | |||
645 | // (start of kernel code) - (start of kernel descriptor) | |||
646 | // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. | |||
647 | Streamer.EmitValue(MCBinaryExpr::createSub( | |||
648 | MCSymbolRefExpr::create( | |||
649 | KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), | |||
650 | MCSymbolRefExpr::create( | |||
651 | KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), | |||
652 | Context), | |||
653 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); | |||
654 | Streamer.EmitBytes(StringRef( | |||
655 | (const char*)&(KernelDescriptor) + | |||
656 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ) + | |||
657 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset), | |||
658 | sizeof(KernelDescriptor) - | |||
659 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ) - | |||
660 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset))); | |||
661 | } |