File: | lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp |
Warning: | line 102, column 23 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file provides AMDGPU specific target streamer methods. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "AMDGPUTargetStreamer.h" | |||
15 | #include "AMDGPU.h" | |||
16 | #include "SIDefines.h" | |||
17 | #include "Utils/AMDGPUBaseInfo.h" | |||
18 | #include "Utils/AMDKernelCodeTUtils.h" | |||
19 | #include "llvm/ADT/Twine.h" | |||
20 | #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" | |||
21 | #include "llvm/BinaryFormat/ELF.h" | |||
22 | #include "llvm/BinaryFormat/MsgPackTypes.h" | |||
23 | #include "llvm/IR/Constants.h" | |||
24 | #include "llvm/IR/Function.h" | |||
25 | #include "llvm/IR/Metadata.h" | |||
26 | #include "llvm/IR/Module.h" | |||
27 | #include "llvm/MC/MCContext.h" | |||
28 | #include "llvm/MC/MCELFStreamer.h" | |||
29 | #include "llvm/MC/MCObjectFileInfo.h" | |||
30 | #include "llvm/MC/MCSectionELF.h" | |||
31 | #include "llvm/Support/FormattedStream.h" | |||
32 | #include "llvm/Support/TargetParser.h" | |||
33 | ||||
34 | namespace llvm { | |||
35 | #include "AMDGPUPTNote.h" | |||
36 | } | |||
37 | ||||
38 | using namespace llvm; | |||
39 | using namespace llvm::AMDGPU; | |||
40 | using namespace llvm::AMDGPU::HSAMD; | |||
41 | ||||
42 | //===----------------------------------------------------------------------===// | |||
43 | // AMDGPUTargetStreamer | |||
44 | //===----------------------------------------------------------------------===// | |||
45 | ||||
46 | bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { | |||
47 | HSAMD::Metadata HSAMetadata; | |||
48 | if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) | |||
49 | return false; | |||
50 | ||||
51 | return EmitHSAMetadata(HSAMetadata); | |||
52 | } | |||
53 | ||||
54 | bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { | |||
55 | std::shared_ptr<msgpack::Node> HSAMetadataRoot; | |||
56 | yaml::Input YIn(HSAMetadataString); | |||
57 | YIn >> HSAMetadataRoot; | |||
58 | if (YIn.error()) | |||
59 | return false; | |||
60 | return EmitHSAMetadata(HSAMetadataRoot, false); | |||
61 | } | |||
62 | ||||
63 | StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { | |||
64 | AMDGPU::GPUKind AK; | |||
| ||||
65 | ||||
66 | switch (ElfMach) { | |||
67 | case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; | |||
68 | case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; | |||
69 | case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; | |||
70 | case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; | |||
71 | case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; | |||
72 | case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; | |||
73 | case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; | |||
74 | case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; | |||
75 | case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; | |||
76 | case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; | |||
77 | case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; | |||
78 | case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; | |||
79 | case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; | |||
80 | case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; | |||
81 | case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; | |||
82 | case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; | |||
83 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; | |||
84 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; | |||
85 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; | |||
86 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; | |||
87 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; | |||
88 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; | |||
89 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; | |||
90 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; | |||
91 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; | |||
92 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; | |||
93 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; | |||
94 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; | |||
95 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; | |||
96 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; | |||
97 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; | |||
98 | case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; | |||
99 | case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; | |||
100 | } | |||
101 | ||||
102 | StringRef GPUName = getArchNameAMDGCN(AK); | |||
| ||||
103 | if (GPUName != "") | |||
104 | return GPUName; | |||
105 | return getArchNameR600(AK); | |||
106 | } | |||
107 | ||||
108 | unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { | |||
109 | AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); | |||
110 | if (AK == AMDGPU::GPUKind::GK_NONE) | |||
111 | AK = parseArchR600(GPU); | |||
112 | ||||
113 | switch (AK) { | |||
114 | case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; | |||
115 | case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; | |||
116 | case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; | |||
117 | case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; | |||
118 | case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; | |||
119 | case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; | |||
120 | case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; | |||
121 | case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; | |||
122 | case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; | |||
123 | case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; | |||
124 | case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; | |||
125 | case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; | |||
126 | case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; | |||
127 | case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; | |||
128 | case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; | |||
129 | case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; | |||
130 | case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; | |||
131 | case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; | |||
132 | case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; | |||
133 | case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; | |||
134 | case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; | |||
135 | case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; | |||
136 | case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; | |||
137 | case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; | |||
138 | case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; | |||
139 | case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; | |||
140 | case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; | |||
141 | case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; | |||
142 | case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; | |||
143 | case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; | |||
144 | case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; | |||
145 | case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; | |||
146 | case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; | |||
147 | } | |||
148 | ||||
149 | llvm_unreachable("unknown GPU")::llvm::llvm_unreachable_internal("unknown GPU", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp" , 149); | |||
150 | } | |||
151 | ||||
152 | //===----------------------------------------------------------------------===// | |||
153 | // AMDGPUTargetAsmStreamer | |||
154 | //===----------------------------------------------------------------------===// | |||
155 | ||||
156 | AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, | |||
157 | formatted_raw_ostream &OS) | |||
158 | : AMDGPUTargetStreamer(S), OS(OS) { } | |||
159 | ||||
160 | void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) { | |||
161 | OS << "\t.amdgcn_target \"" << Target << "\"\n"; | |||
162 | } | |||
163 | ||||
164 | void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion( | |||
165 | uint32_t Major, uint32_t Minor) { | |||
166 | OS << "\t.hsa_code_object_version " << | |||
167 | Twine(Major) << "," << Twine(Minor) << '\n'; | |||
168 | } | |||
169 | ||||
170 | void | |||
171 | AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, | |||
172 | uint32_t Minor, | |||
173 | uint32_t Stepping, | |||
174 | StringRef VendorName, | |||
175 | StringRef ArchName) { | |||
176 | OS << "\t.hsa_code_object_isa " << | |||
177 | Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << | |||
178 | ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; | |||
179 | ||||
180 | } | |||
181 | ||||
182 | void | |||
183 | AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { | |||
184 | OS << "\t.amd_kernel_code_t\n"; | |||
185 | dumpAmdKernelCode(&Header, OS, "\t\t"); | |||
186 | OS << "\t.end_amd_kernel_code_t\n"; | |||
187 | } | |||
188 | ||||
189 | void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, | |||
190 | unsigned Type) { | |||
191 | switch (Type) { | |||
192 | default: llvm_unreachable("Invalid AMDGPU symbol type")::llvm::llvm_unreachable_internal("Invalid AMDGPU symbol type" , "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp" , 192); | |||
193 | case ELF::STT_AMDGPU_HSA_KERNEL: | |||
194 | OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; | |||
195 | break; | |||
196 | } | |||
197 | } | |||
198 | ||||
199 | bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { | |||
200 | OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; | |||
201 | return true; | |||
202 | } | |||
203 | ||||
204 | bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( | |||
205 | const AMDGPU::HSAMD::Metadata &HSAMetadata) { | |||
206 | std::string HSAMetadataString; | |||
207 | if (HSAMD::toString(HSAMetadata, HSAMetadataString)) | |||
208 | return false; | |||
209 | ||||
210 | OS << '\t' << AssemblerDirectiveBegin << '\n'; | |||
211 | OS << HSAMetadataString << '\n'; | |||
212 | OS << '\t' << AssemblerDirectiveEnd << '\n'; | |||
213 | return true; | |||
214 | } | |||
215 | ||||
216 | bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( | |||
217 | std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) { | |||
218 | V3::MetadataVerifier Verifier(Strict); | |||
219 | if (!Verifier.verify(*HSAMetadataRoot)) | |||
220 | return false; | |||
221 | ||||
222 | std::string HSAMetadataString; | |||
223 | raw_string_ostream StrOS(HSAMetadataString); | |||
224 | yaml::Output YOut(StrOS); | |||
225 | YOut << HSAMetadataRoot; | |||
226 | ||||
227 | OS << '\t' << V3::AssemblerDirectiveBegin << '\n'; | |||
228 | OS << StrOS.str() << '\n'; | |||
229 | OS << '\t' << V3::AssemblerDirectiveEnd << '\n'; | |||
230 | return true; | |||
231 | } | |||
232 | ||||
233 | bool AMDGPUTargetAsmStreamer::EmitPALMetadata( | |||
234 | const PALMD::Metadata &PALMetadata) { | |||
235 | std::string PALMetadataString; | |||
236 | if (PALMD::toString(PALMetadata, PALMetadataString)) | |||
237 | return false; | |||
238 | ||||
239 | OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n'; | |||
240 | return true; | |||
241 | } | |||
242 | ||||
243 | void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( | |||
244 | const MCSubtargetInfo &STI, StringRef KernelName, | |||
245 | const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, | |||
246 | bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { | |||
247 | IsaVersion IVersion = getIsaVersion(STI.getCPU()); | |||
248 | ||||
249 | OS << "\t.amdhsa_kernel " << KernelName << '\n'; | |||
250 | ||||
251 | #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ | |||
252 | STREAM << "\t\t" << DIRECTIVE << " " \ | |||
253 | << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME)((KERNEL_DESC.MEMBER_NAME & FIELD_NAME) >> FIELD_NAME_SHIFT ) << '\n'; | |||
254 | ||||
255 | OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size | |||
256 | << '\n'; | |||
257 | OS << "\t\t.amdhsa_private_segment_fixed_size " | |||
258 | << KD.private_segment_fixed_size << '\n'; | |||
259 | ||||
260 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, | |||
261 | kernel_code_properties, | |||
262 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); | |||
263 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, | |||
264 | kernel_code_properties, | |||
265 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); | |||
266 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD, | |||
267 | kernel_code_properties, | |||
268 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); | |||
269 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, | |||
270 | kernel_code_properties, | |||
271 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); | |||
272 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD, | |||
273 | kernel_code_properties, | |||
274 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); | |||
275 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, | |||
276 | kernel_code_properties, | |||
277 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); | |||
278 | PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, | |||
279 | kernel_code_properties, | |||
280 | amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); | |||
281 | PRINT_FIELD( | |||
282 | OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, | |||
283 | compute_pgm_rsrc2, | |||
284 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); | |||
285 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, | |||
286 | compute_pgm_rsrc2, | |||
287 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); | |||
288 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, | |||
289 | compute_pgm_rsrc2, | |||
290 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); | |||
291 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, | |||
292 | compute_pgm_rsrc2, | |||
293 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); | |||
294 | PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD, | |||
295 | compute_pgm_rsrc2, | |||
296 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); | |||
297 | PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD, | |||
298 | compute_pgm_rsrc2, | |||
299 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); | |||
300 | ||||
301 | // These directives are required. | |||
302 | OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; | |||
303 | OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; | |||
304 | ||||
305 | if (!ReserveVCC) | |||
306 | OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; | |||
307 | if (IVersion.Major >= 7 && !ReserveFlatScr) | |||
308 | OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; | |||
309 | if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) | |||
310 | OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; | |||
311 | ||||
312 | PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, | |||
313 | compute_pgm_rsrc1, | |||
314 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); | |||
315 | PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD, | |||
316 | compute_pgm_rsrc1, | |||
317 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); | |||
318 | PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD, | |||
319 | compute_pgm_rsrc1, | |||
320 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); | |||
321 | PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, | |||
322 | compute_pgm_rsrc1, | |||
323 | amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); | |||
324 | PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, | |||
325 | compute_pgm_rsrc1, | |||
326 | amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); | |||
327 | PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, | |||
328 | compute_pgm_rsrc1, | |||
329 | amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); | |||
330 | if (IVersion.Major >= 9) | |||
331 | PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, | |||
332 | compute_pgm_rsrc1, | |||
333 | amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); | |||
334 | PRINT_FIELD( | |||
335 | OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, | |||
336 | compute_pgm_rsrc2, | |||
337 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); | |||
338 | PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD, | |||
339 | compute_pgm_rsrc2, | |||
340 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); | |||
341 | PRINT_FIELD( | |||
342 | OS, ".amdhsa_exception_fp_ieee_div_zero", KD, | |||
343 | compute_pgm_rsrc2, | |||
344 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); | |||
345 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD, | |||
346 | compute_pgm_rsrc2, | |||
347 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); | |||
348 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD, | |||
349 | compute_pgm_rsrc2, | |||
350 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); | |||
351 | PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD, | |||
352 | compute_pgm_rsrc2, | |||
353 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); | |||
354 | PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD, | |||
355 | compute_pgm_rsrc2, | |||
356 | amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); | |||
357 | #undef PRINT_FIELD | |||
358 | ||||
359 | OS << "\t.end_amdhsa_kernel\n"; | |||
360 | } | |||
361 | ||||
362 | //===----------------------------------------------------------------------===// | |||
363 | // AMDGPUTargetELFStreamer | |||
364 | //===----------------------------------------------------------------------===// | |||
365 | ||||
366 | AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( | |||
367 | MCStreamer &S, const MCSubtargetInfo &STI) | |||
368 | : AMDGPUTargetStreamer(S), Streamer(S) { | |||
369 | MCAssembler &MCA = getStreamer().getAssembler(); | |||
370 | unsigned EFlags = MCA.getELFHeaderEFlags(); | |||
371 | ||||
372 | EFlags &= ~ELF::EF_AMDGPU_MACH; | |||
373 | EFlags |= getElfMach(STI.getCPU()); | |||
374 | ||||
375 | EFlags &= ~ELF::EF_AMDGPU_XNACK; | |||
376 | if (AMDGPU::hasXNACK(STI)) | |||
377 | EFlags |= ELF::EF_AMDGPU_XNACK; | |||
378 | ||||
379 | EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC; | |||
380 | if (AMDGPU::hasSRAMECC(STI)) | |||
381 | EFlags |= ELF::EF_AMDGPU_SRAM_ECC; | |||
382 | ||||
383 | MCA.setELFHeaderEFlags(EFlags); | |||
384 | } | |||
385 | ||||
386 | MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { | |||
387 | return static_cast<MCELFStreamer &>(Streamer); | |||
388 | } | |||
389 | ||||
390 | void AMDGPUTargetELFStreamer::EmitNote( | |||
391 | StringRef Name, const MCExpr *DescSZ, unsigned NoteType, | |||
392 | function_ref<void(MCELFStreamer &)> EmitDesc) { | |||
393 | auto &S = getStreamer(); | |||
394 | auto &Context = S.getContext(); | |||
395 | ||||
396 | auto NameSZ = Name.size() + 1; | |||
397 | ||||
398 | S.PushSection(); | |||
399 | S.SwitchSection(Context.getELFSection( | |||
400 | ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); | |||
401 | S.EmitIntValue(NameSZ, 4); // namesz | |||
402 | S.EmitValue(DescSZ, 4); // descz | |||
403 | S.EmitIntValue(NoteType, 4); // type | |||
404 | S.EmitBytes(Name); // name | |||
405 | S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 | |||
406 | EmitDesc(S); // desc | |||
407 | S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 | |||
408 | S.PopSection(); | |||
409 | } | |||
410 | ||||
411 | void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {} | |||
412 | ||||
413 | void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( | |||
414 | uint32_t Major, uint32_t Minor) { | |||
415 | ||||
416 | EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), | |||
417 | ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { | |||
418 | OS.EmitIntValue(Major, 4); | |||
419 | OS.EmitIntValue(Minor, 4); | |||
420 | }); | |||
421 | } | |||
422 | ||||
423 | void | |||
424 | AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, | |||
425 | uint32_t Minor, | |||
426 | uint32_t Stepping, | |||
427 | StringRef VendorName, | |||
428 | StringRef ArchName) { | |||
429 | uint16_t VendorNameSize = VendorName.size() + 1; | |||
430 | uint16_t ArchNameSize = ArchName.size() + 1; | |||
431 | ||||
432 | unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + | |||
433 | sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + | |||
434 | VendorNameSize + ArchNameSize; | |||
435 | ||||
436 | EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), | |||
437 | ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { | |||
438 | OS.EmitIntValue(VendorNameSize, 2); | |||
439 | OS.EmitIntValue(ArchNameSize, 2); | |||
440 | OS.EmitIntValue(Major, 4); | |||
441 | OS.EmitIntValue(Minor, 4); | |||
442 | OS.EmitIntValue(Stepping, 4); | |||
443 | OS.EmitBytes(VendorName); | |||
444 | OS.EmitIntValue(0, 1); // NULL terminate VendorName | |||
445 | OS.EmitBytes(ArchName); | |||
446 | OS.EmitIntValue(0, 1); // NULL terminte ArchName | |||
447 | }); | |||
448 | } | |||
449 | ||||
450 | void | |||
451 | AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { | |||
452 | ||||
453 | MCStreamer &OS = getStreamer(); | |||
454 | OS.PushSection(); | |||
455 | OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); | |||
456 | OS.PopSection(); | |||
457 | } | |||
458 | ||||
459 | void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, | |||
460 | unsigned Type) { | |||
461 | MCSymbolELF *Symbol = cast<MCSymbolELF>( | |||
462 | getStreamer().getContext().getOrCreateSymbol(SymbolName)); | |||
463 | Symbol->setType(Type); | |||
464 | } | |||
465 | ||||
466 | bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { | |||
467 | // Create two labels to mark the beginning and end of the desc field | |||
468 | // and a MCExpr to calculate the size of the desc field. | |||
469 | auto &Context = getContext(); | |||
470 | auto *DescBegin = Context.createTempSymbol(); | |||
471 | auto *DescEnd = Context.createTempSymbol(); | |||
472 | auto *DescSZ = MCBinaryExpr::createSub( | |||
473 | MCSymbolRefExpr::create(DescEnd, Context), | |||
474 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
475 | ||||
476 | EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, | |||
477 | [&](MCELFStreamer &OS) { | |||
478 | OS.EmitLabel(DescBegin); | |||
479 | OS.EmitBytes(IsaVersionString); | |||
480 | OS.EmitLabel(DescEnd); | |||
481 | }); | |||
482 | return true; | |||
483 | } | |||
484 | ||||
485 | bool AMDGPUTargetELFStreamer::EmitHSAMetadata( | |||
486 | std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) { | |||
487 | V3::MetadataVerifier Verifier(Strict); | |||
488 | if (!Verifier.verify(*HSAMetadataRoot)) | |||
489 | return false; | |||
490 | ||||
491 | std::string HSAMetadataString; | |||
492 | raw_string_ostream StrOS(HSAMetadataString); | |||
493 | msgpack::Writer MPWriter(StrOS); | |||
494 | HSAMetadataRoot->write(MPWriter); | |||
495 | ||||
496 | // Create two labels to mark the beginning and end of the desc field | |||
497 | // and a MCExpr to calculate the size of the desc field. | |||
498 | auto &Context = getContext(); | |||
499 | auto *DescBegin = Context.createTempSymbol(); | |||
500 | auto *DescEnd = Context.createTempSymbol(); | |||
501 | auto *DescSZ = MCBinaryExpr::createSub( | |||
502 | MCSymbolRefExpr::create(DescEnd, Context), | |||
503 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
504 | ||||
505 | EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, | |||
506 | [&](MCELFStreamer &OS) { | |||
507 | OS.EmitLabel(DescBegin); | |||
508 | OS.EmitBytes(StrOS.str()); | |||
509 | OS.EmitLabel(DescEnd); | |||
510 | }); | |||
511 | return true; | |||
512 | } | |||
513 | ||||
514 | bool AMDGPUTargetELFStreamer::EmitHSAMetadata( | |||
515 | const AMDGPU::HSAMD::Metadata &HSAMetadata) { | |||
516 | std::string HSAMetadataString; | |||
517 | if (HSAMD::toString(HSAMetadata, HSAMetadataString)) | |||
518 | return false; | |||
519 | ||||
520 | // Create two labels to mark the beginning and end of the desc field | |||
521 | // and a MCExpr to calculate the size of the desc field. | |||
522 | auto &Context = getContext(); | |||
523 | auto *DescBegin = Context.createTempSymbol(); | |||
524 | auto *DescEnd = Context.createTempSymbol(); | |||
525 | auto *DescSZ = MCBinaryExpr::createSub( | |||
526 | MCSymbolRefExpr::create(DescEnd, Context), | |||
527 | MCSymbolRefExpr::create(DescBegin, Context), Context); | |||
528 | ||||
529 | EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, | |||
530 | [&](MCELFStreamer &OS) { | |||
531 | OS.EmitLabel(DescBegin); | |||
532 | OS.EmitBytes(HSAMetadataString); | |||
533 | OS.EmitLabel(DescEnd); | |||
534 | }); | |||
535 | return true; | |||
536 | } | |||
537 | ||||
538 | bool AMDGPUTargetELFStreamer::EmitPALMetadata( | |||
539 | const PALMD::Metadata &PALMetadata) { | |||
540 | EmitNote(ElfNote::NoteNameV2, | |||
541 | MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), | |||
542 | getContext()), | |||
543 | ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) { | |||
544 | for (auto I : PALMetadata) | |||
545 | OS.EmitIntValue(I, sizeof(uint32_t)); | |||
546 | }); | |||
547 | return true; | |||
548 | } | |||
549 | ||||
550 | void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( | |||
551 | const MCSubtargetInfo &STI, StringRef KernelName, | |||
552 | const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, | |||
553 | uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, | |||
554 | bool ReserveXNACK) { | |||
555 | auto &Streamer = getStreamer(); | |||
556 | auto &Context = Streamer.getContext(); | |||
557 | ||||
558 | MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( | |||
559 | Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); | |||
560 | KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL); | |||
561 | KernelDescriptorSymbol->setType(ELF::STT_OBJECT); | |||
562 | KernelDescriptorSymbol->setSize( | |||
563 | MCConstantExpr::create(sizeof(KernelDescriptor), Context)); | |||
564 | ||||
565 | MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( | |||
566 | Context.getOrCreateSymbol(Twine(KernelName))); | |||
567 | KernelCodeSymbol->setBinding(ELF::STB_LOCAL); | |||
568 | ||||
569 | Streamer.EmitLabel(KernelDescriptorSymbol); | |||
570 | Streamer.EmitBytes(StringRef( | |||
571 | (const char*)&(KernelDescriptor), | |||
572 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ))); | |||
573 | // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The | |||
574 | // expression being created is: | |||
575 | // (start of kernel code) - (start of kernel descriptor) | |||
576 | // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. | |||
577 | Streamer.EmitValue(MCBinaryExpr::createSub( | |||
578 | MCSymbolRefExpr::create( | |||
579 | KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), | |||
580 | MCSymbolRefExpr::create( | |||
581 | KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), | |||
582 | Context), | |||
583 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); | |||
584 | Streamer.EmitBytes(StringRef( | |||
585 | (const char*)&(KernelDescriptor) + | |||
586 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ) + | |||
587 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset), | |||
588 | sizeof(KernelDescriptor) - | |||
589 | offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset ) - | |||
590 | sizeof(KernelDescriptor.kernel_code_entry_byte_offset))); | |||
591 | } |