Bug Summary

File:lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Warning:line 99, column 23
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUTargetStreamer.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Target/AMDGPU/MCTargetDesc -I /build/llvm-toolchain-snapshot-9~svn362543/lib/Target/AMDGPU/MCTargetDesc -I /build/llvm-toolchain-snapshot-9~svn362543/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn362543/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Target/AMDGPU/MCTargetDesc -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn362543=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-06-05-060531-1271-1 -x c++ /build/llvm-toolchain-snapshot-9~svn362543/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp -faddrsig
1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPUTargetStreamer.h"
14#include "AMDGPU.h"
15#include "SIDefines.h"
16#include "Utils/AMDGPUBaseInfo.h"
17#include "Utils/AMDKernelCodeTUtils.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
20#include "llvm/BinaryFormat/ELF.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/Metadata.h"
24#include "llvm/IR/Module.h"
25#include "llvm/MC/MCContext.h"
26#include "llvm/MC/MCELFStreamer.h"
27#include "llvm/MC/MCObjectFileInfo.h"
28#include "llvm/MC/MCSectionELF.h"
29#include "llvm/Support/FormattedStream.h"
30#include "llvm/Support/TargetParser.h"
31
32namespace llvm {
33#include "AMDGPUPTNote.h"
34}
35
36using namespace llvm;
37using namespace llvm::AMDGPU;
38using namespace llvm::AMDGPU::HSAMD;
39
40//===----------------------------------------------------------------------===//
41// AMDGPUTargetStreamer
42//===----------------------------------------------------------------------===//
43
44bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
45 HSAMD::Metadata HSAMetadata;
46 if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
47 return false;
48
49 return EmitHSAMetadata(HSAMetadata);
50}
51
52bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
53 msgpack::Document HSAMetadataDoc;
54 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
55 return false;
56 return EmitHSAMetadata(HSAMetadataDoc, false);
57}
58
59StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
60 AMDGPU::GPUKind AK;
1
'AK' declared without an initial value
61
62 switch (ElfMach) {
2
'Default' branch taken. Execution continues on line 99
63 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
64 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
65 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
66 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
67 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
68 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
69 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
70 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
71 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
72 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
73 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
74 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
75 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
76 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
77 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
78 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
96 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
97 }
98
99 StringRef GPUName = getArchNameAMDGCN(AK);
3
1st function call argument is an uninitialized value
100 if (GPUName != "")
101 return GPUName;
102 return getArchNameR600(AK);
103}
104
105unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
106 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
107 if (AK == AMDGPU::GPUKind::GK_NONE)
108 AK = parseArchR600(GPU);
109
110 switch (AK) {
111 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
112 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
113 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
114 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
115 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
116 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
117 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
118 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
119 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
120 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
121 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
122 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
123 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
124 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
125 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
126 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
127 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
128 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
129 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
130 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
131 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
132 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
133 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
134 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
135 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
136 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
137 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
138 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
139 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
140 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
141 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
142 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
143 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
144 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
145 }
146
147 llvm_unreachable("unknown GPU")::llvm::llvm_unreachable_internal("unknown GPU", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp"
, 147)
;
148}
149
150//===----------------------------------------------------------------------===//
151// AMDGPUTargetAsmStreamer
152//===----------------------------------------------------------------------===//
153
154AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
155 formatted_raw_ostream &OS)
156 : AMDGPUTargetStreamer(S), OS(OS) { }
157
158// A hook for emitting stuff at the end.
159// We use it for emitting the accumulated PAL metadata as directives.
160void AMDGPUTargetAsmStreamer::finish() {
161 std::string S;
162 getPALMetadata()->toString(S);
163 OS << S;
164}
165
166void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
167 OS << "\t.amdgcn_target \"" << Target << "\"\n";
168}
169
170void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
171 uint32_t Major, uint32_t Minor) {
172 OS << "\t.hsa_code_object_version " <<
173 Twine(Major) << "," << Twine(Minor) << '\n';
174}
175
176void
177AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
178 uint32_t Minor,
179 uint32_t Stepping,
180 StringRef VendorName,
181 StringRef ArchName) {
182 OS << "\t.hsa_code_object_isa " <<
183 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
184 ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
185
186}
187
188void
189AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
190 OS << "\t.amd_kernel_code_t\n";
191 dumpAmdKernelCode(&Header, OS, "\t\t");
192 OS << "\t.end_amd_kernel_code_t\n";
193}
194
195void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
196 unsigned Type) {
197 switch (Type) {
198 default: llvm_unreachable("Invalid AMDGPU symbol type")::llvm::llvm_unreachable_internal("Invalid AMDGPU symbol type"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp"
, 198)
;
199 case ELF::STT_AMDGPU_HSA_KERNEL:
200 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
201 break;
202 }
203}
204
205bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
206 OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
207 return true;
208}
209
210bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
211 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
212 std::string HSAMetadataString;
213 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
214 return false;
215
216 OS << '\t' << AssemblerDirectiveBegin << '\n';
217 OS << HSAMetadataString << '\n';
218 OS << '\t' << AssemblerDirectiveEnd << '\n';
219 return true;
220}
221
222bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
223 msgpack::Document &HSAMetadataDoc, bool Strict) {
224 V3::MetadataVerifier Verifier(Strict);
225 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
226 return false;
227
228 std::string HSAMetadataString;
229 raw_string_ostream StrOS(HSAMetadataString);
230 HSAMetadataDoc.toYAML(StrOS);
231
232 OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
233 OS << StrOS.str() << '\n';
234 OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
235 return true;
236}
237
238bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
239 const uint32_t Encoded_s_code_end = 0xbf9f0000;
240 OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
241 OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
242 return true;
243}
244
245void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
246 const MCSubtargetInfo &STI, StringRef KernelName,
247 const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
248 bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
249 IsaVersion IVersion = getIsaVersion(STI.getCPU());
250
251 OS << "\t.amdhsa_kernel " << KernelName << '\n';
252
253#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
254 STREAM << "\t\t" << DIRECTIVE << " " \
255 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME)((KERNEL_DESC.MEMBER_NAME & FIELD_NAME) >> FIELD_NAME_SHIFT
)
<< '\n';
256
257 OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
258 << '\n';
259 OS << "\t\t.amdhsa_private_segment_fixed_size "
260 << KD.private_segment_fixed_size << '\n';
261
262 PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
263 kernel_code_properties,
264 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
265 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
266 kernel_code_properties,
267 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
268 PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
269 kernel_code_properties,
270 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
271 PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
272 kernel_code_properties,
273 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
274 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
275 kernel_code_properties,
276 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
277 PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
278 kernel_code_properties,
279 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
280 PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
281 kernel_code_properties,
282 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
283 PRINT_FIELD(
284 OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
285 compute_pgm_rsrc2,
286 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
287 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
288 compute_pgm_rsrc2,
289 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
290 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
291 compute_pgm_rsrc2,
292 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
293 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
294 compute_pgm_rsrc2,
295 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
296 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
297 compute_pgm_rsrc2,
298 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
299 PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
300 compute_pgm_rsrc2,
301 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
302
303 // These directives are required.
304 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
305 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
306
307 if (!ReserveVCC)
308 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
309 if (IVersion.Major >= 7 && !ReserveFlatScr)
310 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
311 if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
312 OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
313
314 PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
315 compute_pgm_rsrc1,
316 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
317 PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
318 compute_pgm_rsrc1,
319 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
320 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
321 compute_pgm_rsrc1,
322 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
323 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
324 compute_pgm_rsrc1,
325 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
326 PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
327 compute_pgm_rsrc1,
328 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
329 PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
330 compute_pgm_rsrc1,
331 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
332 if (IVersion.Major >= 9)
333 PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
334 compute_pgm_rsrc1,
335 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
336 if (IVersion.Major >= 10) {
337 PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
338 compute_pgm_rsrc1,
339 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
340 PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
341 compute_pgm_rsrc1,
342 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
343 PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
344 compute_pgm_rsrc1,
345 amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
346 }
347 PRINT_FIELD(
348 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
349 compute_pgm_rsrc2,
350 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
351 PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
352 compute_pgm_rsrc2,
353 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
354 PRINT_FIELD(
355 OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
356 compute_pgm_rsrc2,
357 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
358 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
359 compute_pgm_rsrc2,
360 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
361 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
362 compute_pgm_rsrc2,
363 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
364 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
365 compute_pgm_rsrc2,
366 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
367 PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
368 compute_pgm_rsrc2,
369 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
370#undef PRINT_FIELD
371
372 OS << "\t.end_amdhsa_kernel\n";
373}
374
375//===----------------------------------------------------------------------===//
376// AMDGPUTargetELFStreamer
377//===----------------------------------------------------------------------===//
378
379AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
380 MCStreamer &S, const MCSubtargetInfo &STI)
381 : AMDGPUTargetStreamer(S), Streamer(S) {
382 MCAssembler &MCA = getStreamer().getAssembler();
383 unsigned EFlags = MCA.getELFHeaderEFlags();
384
385 EFlags &= ~ELF::EF_AMDGPU_MACH;
386 EFlags |= getElfMach(STI.getCPU());
387
388 EFlags &= ~ELF::EF_AMDGPU_XNACK;
389 if (AMDGPU::hasXNACK(STI))
390 EFlags |= ELF::EF_AMDGPU_XNACK;
391
392 EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
393 if (AMDGPU::hasSRAMECC(STI))
394 EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
395
396 MCA.setELFHeaderEFlags(EFlags);
397}
398
399MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
400 return static_cast<MCELFStreamer &>(Streamer);
401}
402
403// A hook for emitting stuff at the end.
404// We use it for emitting the accumulated PAL metadata as a .note record.
405void AMDGPUTargetELFStreamer::finish() {
406 std::string Blob;
407 const char *Vendor = getPALMetadata()->getVendor();
408 unsigned Type = getPALMetadata()->getType();
409 getPALMetadata()->toBlob(Type, Blob);
410 if (Blob.empty())
411 return;
412 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
413 [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); });
414}
415
416void AMDGPUTargetELFStreamer::EmitNote(
417 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
418 function_ref<void(MCELFStreamer &)> EmitDesc) {
419 auto &S = getStreamer();
420 auto &Context = S.getContext();
421
422 auto NameSZ = Name.size() + 1;
423
424 S.PushSection();
425 S.SwitchSection(Context.getELFSection(
426 ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
427 S.EmitIntValue(NameSZ, 4); // namesz
428 S.EmitValue(DescSZ, 4); // descz
429 S.EmitIntValue(NoteType, 4); // type
430 S.EmitBytes(Name); // name
431 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
432 EmitDesc(S); // desc
433 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
434 S.PopSection();
435}
436
437void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
438
439void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
440 uint32_t Major, uint32_t Minor) {
441
442 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
443 ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
444 OS.EmitIntValue(Major, 4);
445 OS.EmitIntValue(Minor, 4);
446 });
447}
448
449void
450AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
451 uint32_t Minor,
452 uint32_t Stepping,
453 StringRef VendorName,
454 StringRef ArchName) {
455 uint16_t VendorNameSize = VendorName.size() + 1;
456 uint16_t ArchNameSize = ArchName.size() + 1;
457
458 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
459 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
460 VendorNameSize + ArchNameSize;
461
462 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
463 ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
464 OS.EmitIntValue(VendorNameSize, 2);
465 OS.EmitIntValue(ArchNameSize, 2);
466 OS.EmitIntValue(Major, 4);
467 OS.EmitIntValue(Minor, 4);
468 OS.EmitIntValue(Stepping, 4);
469 OS.EmitBytes(VendorName);
470 OS.EmitIntValue(0, 1); // NULL terminate VendorName
471 OS.EmitBytes(ArchName);
472 OS.EmitIntValue(0, 1); // NULL terminte ArchName
473 });
474}
475
476void
477AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
478
479 MCStreamer &OS = getStreamer();
480 OS.PushSection();
481 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
482 OS.PopSection();
483}
484
485void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
486 unsigned Type) {
487 MCSymbolELF *Symbol = cast<MCSymbolELF>(
488 getStreamer().getContext().getOrCreateSymbol(SymbolName));
489 Symbol->setType(Type);
490}
491
492bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
493 // Create two labels to mark the beginning and end of the desc field
494 // and a MCExpr to calculate the size of the desc field.
495 auto &Context = getContext();
496 auto *DescBegin = Context.createTempSymbol();
497 auto *DescEnd = Context.createTempSymbol();
498 auto *DescSZ = MCBinaryExpr::createSub(
499 MCSymbolRefExpr::create(DescEnd, Context),
500 MCSymbolRefExpr::create(DescBegin, Context), Context);
501
502 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
503 [&](MCELFStreamer &OS) {
504 OS.EmitLabel(DescBegin);
505 OS.EmitBytes(IsaVersionString);
506 OS.EmitLabel(DescEnd);
507 });
508 return true;
509}
510
511bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
512 bool Strict) {
513 V3::MetadataVerifier Verifier(Strict);
514 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
515 return false;
516
517 std::string HSAMetadataString;
518 HSAMetadataDoc.writeToBlob(HSAMetadataString);
519
520 // Create two labels to mark the beginning and end of the desc field
521 // and a MCExpr to calculate the size of the desc field.
522 auto &Context = getContext();
523 auto *DescBegin = Context.createTempSymbol();
524 auto *DescEnd = Context.createTempSymbol();
525 auto *DescSZ = MCBinaryExpr::createSub(
526 MCSymbolRefExpr::create(DescEnd, Context),
527 MCSymbolRefExpr::create(DescBegin, Context), Context);
528
529 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
530 [&](MCELFStreamer &OS) {
531 OS.EmitLabel(DescBegin);
532 OS.EmitBytes(HSAMetadataString);
533 OS.EmitLabel(DescEnd);
534 });
535 return true;
536}
537
538bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
539 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
540 std::string HSAMetadataString;
541 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
542 return false;
543
544 // Create two labels to mark the beginning and end of the desc field
545 // and a MCExpr to calculate the size of the desc field.
546 auto &Context = getContext();
547 auto *DescBegin = Context.createTempSymbol();
548 auto *DescEnd = Context.createTempSymbol();
549 auto *DescSZ = MCBinaryExpr::createSub(
550 MCSymbolRefExpr::create(DescEnd, Context),
551 MCSymbolRefExpr::create(DescBegin, Context), Context);
552
553 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
554 [&](MCELFStreamer &OS) {
555 OS.EmitLabel(DescBegin);
556 OS.EmitBytes(HSAMetadataString);
557 OS.EmitLabel(DescEnd);
558 });
559 return true;
560}
561
562bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
563 const uint32_t Encoded_s_code_end = 0xbf9f0000;
564
565 MCStreamer &OS = getStreamer();
566 OS.PushSection();
567 OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
568 for (unsigned I = 0; I < 32; ++I)
569 OS.EmitIntValue(Encoded_s_code_end, 4);
570 OS.PopSection();
571 return true;
572}
573
574void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
575 const MCSubtargetInfo &STI, StringRef KernelName,
576 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
577 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
578 bool ReserveXNACK) {
579 auto &Streamer = getStreamer();
580 auto &Context = Streamer.getContext();
581
582 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
583 Context.getOrCreateSymbol(Twine(KernelName)));
584 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
585 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
586
587 // Copy kernel descriptor symbol's binding, other and visibility from the
588 // kernel code symbol.
589 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
590 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
591 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
592 // Kernel descriptor symbol's type and size are fixed.
593 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
594 KernelDescriptorSymbol->setSize(
595 MCConstantExpr::create(sizeof(KernelDescriptor), Context));
596
597 // The visibility of the kernel code symbol must be protected or less to allow
598 // static relocations from the kernel descriptor to be used.
599 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
600 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
601
602 Streamer.EmitLabel(KernelDescriptorSymbol);
603 Streamer.EmitBytes(StringRef(
604 (const char*)&(KernelDescriptor),
605 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset
)
));
606 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
607 // expression being created is:
608 // (start of kernel code) - (start of kernel descriptor)
609 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
610 Streamer.EmitValue(MCBinaryExpr::createSub(
611 MCSymbolRefExpr::create(
612 KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
613 MCSymbolRefExpr::create(
614 KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
615 Context),
616 sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
617 Streamer.EmitBytes(StringRef(
618 (const char*)&(KernelDescriptor) +
619 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset
)
+
620 sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
621 sizeof(KernelDescriptor) -
622 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)__builtin_offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset
)
-
623 sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
624}