45 cl::desc(
"Force a specific generic_v<N> flag to be "
46 "added. For testing purposes only."),
51 if (!HSAMetadataDoc.
fromYAML(HSAMetadataString))
61 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600;
break;
62 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630;
break;
63 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880;
break;
64 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670;
break;
65 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710;
break;
66 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730;
break;
67 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770;
break;
68 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR;
break;
69 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS;
break;
70 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER;
break;
71 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD;
break;
72 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO;
break;
73 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS;
break;
74 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS;
break;
75 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN;
break;
76 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS;
break;
77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600;
break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601;
break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602;
break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700;
break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701;
break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702;
break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703;
break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704;
break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705;
break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801;
break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802;
break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803;
break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805;
break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810;
break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900;
break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902;
break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904;
break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906;
break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908;
break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909;
break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A;
break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C;
break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942;
break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950;
break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010;
break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011;
break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012;
break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013;
break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030;
break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031;
break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032;
break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033;
break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034;
break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035;
break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036;
break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100;
break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101;
break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102;
break;
115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103;
break;
116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150;
break;
117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151;
break;
118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152;
break;
119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153;
break;
120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1154: AK = GK_GFX1154;
break;
121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170: AK = GK_GFX1170;
break;
122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1171: AK = GK_GFX1171;
break;
123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1172: AK = GK_GFX1172;
break;
124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200;
break;
125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201;
break;
126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250: AK = GK_GFX1250;
break;
127 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251: AK = GK_GFX1251;
break;
128 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310: AK = GK_GFX1310;
break;
129 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC;
break;
130 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC;
break;
131 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC;
break;
132 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC;
break;
133 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC;
break;
134 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_7_GENERIC: AK = GK_GFX11_7_GENERIC;
break;
135 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC;
break;
136 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC: AK = GK_GFX12_5_GENERIC;
break;
137 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX13_GENERIC: AK = GK_GFX13_GENERIC;
break;
156 case GK_R600:
return ELF::EF_AMDGPU_MACH_R600_R600;
157 case GK_R630:
return ELF::EF_AMDGPU_MACH_R600_R630;
158 case GK_RS880:
return ELF::EF_AMDGPU_MACH_R600_RS880;
159 case GK_RV670:
return ELF::EF_AMDGPU_MACH_R600_RV670;
160 case GK_RV710:
return ELF::EF_AMDGPU_MACH_R600_RV710;
161 case GK_RV730:
return ELF::EF_AMDGPU_MACH_R600_RV730;
162 case GK_RV770:
return ELF::EF_AMDGPU_MACH_R600_RV770;
163 case GK_CEDAR:
return ELF::EF_AMDGPU_MACH_R600_CEDAR;
164 case GK_CYPRESS:
return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
165 case GK_JUNIPER:
return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
166 case GK_REDWOOD:
return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
167 case GK_SUMO:
return ELF::EF_AMDGPU_MACH_R600_SUMO;
168 case GK_BARTS:
return ELF::EF_AMDGPU_MACH_R600_BARTS;
169 case GK_CAICOS:
return ELF::EF_AMDGPU_MACH_R600_CAICOS;
170 case GK_CAYMAN:
return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
171 case GK_TURKS:
return ELF::EF_AMDGPU_MACH_R600_TURKS;
172 case GK_GFX600:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
173 case GK_GFX601:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
174 case GK_GFX602:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
175 case GK_GFX700:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
176 case GK_GFX701:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
177 case GK_GFX702:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
178 case GK_GFX703:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
179 case GK_GFX704:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
180 case GK_GFX705:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
181 case GK_GFX801:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
182 case GK_GFX802:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
183 case GK_GFX803:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
184 case GK_GFX805:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
185 case GK_GFX810:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
186 case GK_GFX900:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
187 case GK_GFX902:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
188 case GK_GFX904:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
189 case GK_GFX906:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
190 case GK_GFX908:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
191 case GK_GFX909:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
192 case GK_GFX90A:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
193 case GK_GFX90C:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
194 case GK_GFX942:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
195 case GK_GFX950:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
196 case GK_GFX1010:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
197 case GK_GFX1011:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
198 case GK_GFX1012:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
199 case GK_GFX1013:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
200 case GK_GFX1030:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
201 case GK_GFX1031:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
202 case GK_GFX1032:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
203 case GK_GFX1033:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
204 case GK_GFX1034:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
205 case GK_GFX1035:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
206 case GK_GFX1036:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
207 case GK_GFX1100:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
208 case GK_GFX1101:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
209 case GK_GFX1102:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
210 case GK_GFX1103:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
211 case GK_GFX1150:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
212 case GK_GFX1151:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
213 case GK_GFX1152:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
214 case GK_GFX1153:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153;
215 case GK_GFX1154:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1154;
216 case GK_GFX1170:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170;
217 case GK_GFX1171:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1171;
218 case GK_GFX1172:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1172;
219 case GK_GFX1200:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
220 case GK_GFX1201:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
221 case GK_GFX1250:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250;
222 case GK_GFX1251:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251;
223 case GK_GFX1310:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310;
224 case GK_GFX9_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
225 case GK_GFX9_4_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC;
226 case GK_GFX10_1_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
227 case GK_GFX10_3_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
228 case GK_GFX11_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
229 case GK_GFX11_7_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_7_GENERIC;
230 case GK_GFX12_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
231 case GK_GFX12_5_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC;
232 case GK_GFX13_GENERIC:
return ELF::EF_AMDGPU_MACH_AMDGCN_GFX13_GENERIC;
262 OS <<
"\t.amdgcn_target \"" << *
getTargetID() <<
"\"\n";
268 OS <<
"\t.amdhsa_code_object_version " << COV <<
'\n';
277 OS <<
"\t.amd_kernel_code_t\n";
278 Header.EmitKernelCodeT(OS,
getContext(), FoldAndPrint);
279 OS <<
"\t.end_amd_kernel_code_t\n";
288 OS <<
"\t.amdgpu_hsa_kernel " << SymbolName <<
'\n';
295 OS <<
"\t.amdgpu_lds " << Symbol->getName() <<
", " <<
Size <<
", "
296 << Alignment.
value() <<
'\n';
305#define PRINT_RES_INFO(ARG) \
307 ARG->print(OS, &getContext().getAsmInfo()); \
309 getContext().getAsmInfo().printExpr(OS, *ARG->getVariableValue()); \
310 Streamer.addBlankLine();
328#define PRINT_RES_INFO(ARG) \
330 ARG->print(OS, &getContext().getAsmInfo()); \
332 getContext().getAsmInfo().printExpr(OS, *ARG->getVariableValue()); \
333 Streamer.addBlankLine();
343 OS <<
"\t.amd_amdgpu_isa \"" <<
getTargetID() <<
"\"\n";
350 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
353 std::string HSAMetadataString;
355 HSAMetadataDoc.
toYAML(StrOS);
358 OS << StrOS.
str() <<
'\n';
364 const uint32_t Encoded_s_code_end = 0xbf9f0000;
365 const uint32_t Encoded_s_nop = 0xbf800000;
366 uint32_t Encoded_pad = Encoded_s_code_end;
376 Encoded_pad = Encoded_s_nop;
380 OS <<
"\t.p2alignl " << Log2CacheLineSize <<
", " << Encoded_pad <<
'\n';
381 OS <<
"\t.fill " << (FillSize / 4) <<
", 4, " << Encoded_pad <<
'\n';
389 const MCExpr *ReserveFlatScr) {
393 OS <<
"\t.amdhsa_kernel " << KernelName <<
'\n';
398 const MCExpr *ShiftedAndMaskedExpr =
410 OS <<
"\t\t.amdhsa_group_segment_fixed_size ";
414 OS <<
"\t\t.amdhsa_private_segment_fixed_size ";
418 OS <<
"\t\t.amdhsa_kernarg_size ";
424 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
425 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
426 ".amdhsa_user_sgpr_count");
429 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
430 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
431 ".amdhsa_user_sgpr_count");
437 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
438 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
439 ".amdhsa_user_sgpr_private_segment_buffer");
441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
442 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
443 ".amdhsa_user_sgpr_dispatch_ptr");
445 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
447 ".amdhsa_user_sgpr_queue_ptr");
449 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
450 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
451 ".amdhsa_user_sgpr_kernarg_segment_ptr");
453 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
454 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
455 ".amdhsa_user_sgpr_dispatch_id");
458 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
459 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
460 ".amdhsa_user_sgpr_flat_scratch_init");
463 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
464 ".amdhsa_user_sgpr_kernarg_preload_length");
466 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
467 ".amdhsa_user_sgpr_kernarg_preload_offset");
471 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
472 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
473 ".amdhsa_user_sgpr_private_segment_size");
474 if (IVersion.
Major >= 10)
476 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
477 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
478 ".amdhsa_wavefront_size32");
481 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
482 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
483 ".amdhsa_uses_dynamic_stack");
485 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
488 ?
".amdhsa_enable_private_segment"
489 :
".amdhsa_system_sgpr_private_segment_wavefront_offset"));
491 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
492 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
493 ".amdhsa_system_sgpr_workgroup_id_x");
495 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
496 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
497 ".amdhsa_system_sgpr_workgroup_id_y");
499 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
500 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
501 ".amdhsa_system_sgpr_workgroup_id_z");
503 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
504 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
505 ".amdhsa_system_sgpr_workgroup_info");
507 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
508 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
509 ".amdhsa_system_vgpr_workitem_id");
512 OS <<
"\t\t.amdhsa_next_free_vgpr ";
513 EmitMCExpr(NextVGPR);
516 OS <<
"\t\t.amdhsa_next_free_sgpr ";
517 EmitMCExpr(NextSGPR);
524 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
525 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
getContext());
530 OS <<
"\t\t.amdhsa_accum_offset ";
538 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
539 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
540 ".amdhsa_named_barrier_count");
542 OS <<
"\t\t.amdhsa_reserve_vcc ";
543 EmitMCExpr(ReserveVCC);
547 OS <<
"\t\t.amdhsa_reserve_flat_scratch ";
548 EmitMCExpr(ReserveFlatScr);
558 OS <<
"\t\t.amdhsa_reserve_xnack_mask " <<
getTargetID()->isXnackOnOrAny()
564 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
565 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
566 ".amdhsa_float_round_mode_32");
568 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
569 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
570 ".amdhsa_float_round_mode_16_64");
572 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
573 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
574 ".amdhsa_float_denorm_mode_32");
576 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
577 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
578 ".amdhsa_float_denorm_mode_16_64");
579 if (STI.
hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
581 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
582 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
583 ".amdhsa_dx10_clamp");
585 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
586 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
587 ".amdhsa_ieee_mode");
589 if (IVersion.
Major >= 9) {
591 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
592 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
593 ".amdhsa_fp16_overflow");
597 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
598 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
".amdhsa_tg_split");
601 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
602 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
603 ".amdhsa_workgroup_processor_mode");
604 if (IVersion.
Major >= 10) {
606 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
607 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
608 ".amdhsa_memory_ordered");
610 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
611 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
612 ".amdhsa_forward_progress");
614 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
616 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
617 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
618 ".amdhsa_shared_vgpr_count");
620 if (IVersion.
Major == 11) {
622 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
623 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
624 ".amdhsa_inst_pref_size");
626 if (IVersion.
Major >= 12) {
628 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
629 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
630 ".amdhsa_inst_pref_size");
632 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
633 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
634 ".amdhsa_round_robin_scheduling");
639 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
640 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
641 ".amdhsa_exception_fp_ieee_invalid_op");
644 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
645 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
646 ".amdhsa_exception_fp_denorm_src");
650 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
651 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
652 ".amdhsa_exception_fp_ieee_div_zero");
655 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
656 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
657 ".amdhsa_exception_fp_ieee_overflow");
660 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
661 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
662 ".amdhsa_exception_fp_ieee_underflow");
665 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
666 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
667 ".amdhsa_exception_fp_ieee_inexact");
670 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
671 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
672 ".amdhsa_exception_int_div_zero");
674 OS <<
"\t.end_amdhsa_kernel\n";
692 InfoScopeEmitter Emit) {
697 for (
const auto &[Func, Res] :
Data.Uses)
698 FuncUses[Func].push_back(Res);
699 for (
const auto &[Src, Dst] :
Data.Calls)
700 FuncCalls[Src].push_back(Dst);
701 for (
const auto &[Func, TypeId] :
Data.IndirectCalls)
702 FuncIndirectCalls[Func].push_back(TypeId);
703 for (
const auto &[Sym, TypeId] :
Data.TypeIds)
704 FuncTypeIds[Sym].push_back(TypeId);
708 if (!Emitted.insert(Sym).second)
712 if (
auto It = FuncUses.
find(Sym); It != FuncUses.
end())
714 if (
auto It = FuncCalls.
find(Sym); It != FuncCalls.
end())
716 if (
auto It = FuncIndirectCalls.
find(Sym); It != FuncIndirectCalls.
end())
717 IndirectCallTypeIds = It->second;
718 if (
auto It = FuncTypeIds.
find(Sym); It != FuncTypeIds.
end())
719 TypeIds = It->second;
720 Emit(Sym, Info,
Uses, Calls, IndirectCallTypeIds, TypeIds);
724 EmitIfNew(Func.Sym, &Func);
727 for (
const auto &[Sym, TypeId] :
Data.TypeIds)
728 EmitIfNew(Sym,
nullptr);
729 for (
const auto &[Sym, Res] :
Data.Uses)
730 EmitIfNew(Sym,
nullptr);
731 for (
const auto &[Sym, Dst] :
Data.Calls)
732 EmitIfNew(Sym,
nullptr);
733 for (
const auto &[Sym, TypeId] :
Data.IndirectCalls)
734 EmitIfNew(Sym,
nullptr);
745 OS <<
"\t.amdgpu_info " << Sym->
getName() <<
'\n';
750 if (Info->UsesFlatScratch)
752 if (Info->HasDynStack)
755 OS <<
"\t\t.amdgpu_num_sgpr " << Info->NumSGPR <<
'\n';
756 OS <<
"\t\t.amdgpu_num_vgpr " << Info->NumArchVGPR <<
'\n';
757 if (Info->NumAccVGPR)
758 OS <<
"\t\t.amdgpu_num_agpr " << Info->NumAccVGPR <<
'\n';
759 OS <<
"\t\t.amdgpu_private_segment_size " << Info->PrivateSegmentSize
763 OS <<
"\t\t.amdgpu_use " << Res->getName() <<
'\n';
765 OS <<
"\t\t.amdgpu_call " << Dst->getName() <<
'\n';
766 for (
StringRef TypeId : IndirectCallTypeIds)
767 OS <<
"\t\t.amdgpu_indirect_call \"" << TypeId <<
"\"\n";
769 OS <<
"\t\t.amdgpu_typeid \"" << TypeId <<
"\"\n";
770 OS <<
"\t.end_amdgpu_info\n\n";
791 W.setELFHeaderEFlags(getEFlags());
792 W.setOverrideABIVersion(
809void AMDGPUTargetELFStreamer::EmitNote(
813 auto &Context = S.getContext();
815 auto NameSZ = Name.size() + 1;
817 unsigned NoteFlags = 0;
827 S.emitValue(DescSZ, 4);
828 S.emitInt32(NoteType);
831 S.emitValueToAlignment(
Align(4), 0, 1, 0);
833 S.emitValueToAlignment(
Align(4), 0, 1, 0);
837unsigned AMDGPUTargetELFStreamer::getEFlags() {
842 return getEFlagsR600();
844 return getEFlagsAMDGCN();
848unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
854unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
855 assert(STI.getTargetTriple().isAMDGCN());
857 switch (STI.getTargetTriple().getOS()) {
862 return getEFlagsUnknownOS();
864 return getEFlagsAMDHSA();
866 return getEFlagsAMDPAL();
868 return getEFlagsMesa3D();
872unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
876 return getEFlagsV3();
879unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
883 return getEFlagsV6();
884 return getEFlagsV4();
887unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
890 return getEFlagsV3();
893unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
896 return getEFlagsV3();
899unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
900 unsigned EFlagsV3 = 0;
915unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
916 unsigned EFlagsV4 = 0;
923 case AMDGPU::TargetIDSetting::Unsupported:
926 case AMDGPU::TargetIDSetting::Any:
929 case AMDGPU::TargetIDSetting::Off:
932 case AMDGPU::TargetIDSetting::On:
938 case AMDGPU::TargetIDSetting::Unsupported:
941 case AMDGPU::TargetIDSetting::Any:
944 case AMDGPU::TargetIDSetting::Off:
947 case AMDGPU::TargetIDSetting::On:
955unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
956 unsigned Flags = getEFlagsV4();
961 case AMDGPU::GK_GFX9_GENERIC:
964 case AMDGPU::GK_GFX9_4_GENERIC:
967 case AMDGPU::GK_GFX10_1_GENERIC:
970 case AMDGPU::GK_GFX10_3_GENERIC:
973 case AMDGPU::GK_GFX11_GENERIC:
976 case AMDGPU::GK_GFX11_7_GENERIC:
979 case AMDGPU::GK_GFX12_GENERIC:
982 case AMDGPU::GK_GFX12_5_GENERIC:
985 case AMDGPU::GK_GFX13_GENERIC:
998 " - no ELF flag can represent this version!");
1023 auto *SymbolELF =
static_cast<MCSymbolELF *
>(Symbol);
1026 if (!SymbolELF->isBindingSet())
1029 if (SymbolELF->declareCommon(
Size, Alignment)) {
1031 " redeclared as different type");
1042 auto *DescBegin = Context.createTempSymbol();
1043 auto *DescEnd = Context.createTempSymbol();
1065 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
1068 std::string HSAMetadataString;
1074 auto *DescBegin = Context.createTempSymbol();
1075 auto *DescEnd = Context.createTempSymbol();
1090 const uint32_t Encoded_s_code_end = 0xbf9f0000;
1091 const uint32_t Encoded_s_nop = 0xbf800000;
1092 uint32_t Encoded_pad = Encoded_s_code_end;
1102 Encoded_pad = Encoded_s_nop;
1109 for (
unsigned I = 0;
I < FillSize;
I += 4)
1119 const MCExpr *ReserveFlatScr) {
1121 auto &Context = Streamer.getContext();
1123 auto *KernelCodeSymbol =
1125 auto *KernelDescriptorSymbol =
static_cast<MCSymbolELF *
>(
1126 Context.getOrCreateSymbol(
Twine(KernelName) +
Twine(
".kd")));
1130 KernelDescriptorSymbol->
setBinding(KernelCodeSymbol->getBinding());
1131 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1132 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1135 KernelDescriptorSymbol->setSize(
1143 Streamer.emitLabel(KernelDescriptorSymbol);
1154 Streamer.emitInt8(0u);
1167 Streamer.emitInt8(0u);
1180 Streamer.emitInt8(0u);
1192 return StrTab.
add(Str);
1222 if (Info->UsesFlatScratch)
1224 if (Info->HasDynStack)
1231 if (Info->NumAccVGPR)
1234 Info->PrivateSegmentSize);
1241 for (
StringRef TypeId : IndirectCallTypeIds) {
1243 getOrAddString(TypeId));
1249 if (!StrTab.
empty()) {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
Remove Loads Into Fake Uses
verify safepoint Safepoint IR Verifier
void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data) override
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitDirectiveAMDGCNTarget() override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitDirectiveAMDGCNTarget() override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
MCELFStreamer & getStreamer()
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data) override
bool EmitISAVersion() override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
AMDGPUTargetStreamer(MCStreamer &S)
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
const std::optional< AMDGPU::TargetID > & getTargetID() const
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
unsigned CodeObjectVersion
Represent a constant reference to an array (0 or more elements consecutively in memory),...
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
This class is intended to be used as a base class for asm properties and features specific to the tar...
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCAsmInfo & getAsmInfo() const
ELFObjectWriter & getWriter()
void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc()) override
Emit a label for Symbol into the current section.
Base class for the full range of assembler expressions which are needed for parsing.
void emitBytes(StringRef Data) override
Emit the bytes in Data into the output.
This represents a section on linux, lots of unix variants and some bare metal systems.
Streaming machine code generation interface.
virtual bool popSection()
Restore the current and previous section from the section stack.
MCContext & getContext() const
void emitValue(const MCExpr *Value, unsigned Size, SMLoc Loc=SMLoc())
virtual void emitValueToAlignment(Align Alignment, int64_t Fill=0, uint8_t FillLen=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
void pushSection()
Save the current and previous section on the section stack.
virtual void switchSection(MCSection *Section, uint32_t Subsec=0)
Set the current section where code is being emitted to Section.
void emitInt32(uint64_t Value)
void emitInt8(uint64_t Value)
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
LLVM_ABI void setBinding(unsigned Binding) const
LLVM_ABI void setType(unsigned Type) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
StringRef getName() const
getName - Get the symbol name.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Represent a constant reference to a string, i.e.
Utility for building string tables with deduplicated suffixes.
LLVM_ABI void finalizeInOrder()
Finalize the string table without reording it.
LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority=0)
Add a string to the builder.
LLVM_ABI void write(raw_ostream &OS) const
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static constexpr unsigned GFX12_5
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
static constexpr unsigned GFX13
static constexpr unsigned GFX11_7
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
LLVM_ABI StringRef getArchNameR600(GPUKind AK)
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
GPUKind
GPU kinds supported by the AMDGPU target.
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
InfoKind
Entry kind values for the .amdgpu.info section.
@ INFO_INDIRECT_CALL
Indirect call edge: the function contains an indirect call whose callee is expected to match the type...
@ INFO_FLAGS
Bitfield of FuncInfoFlags properties for the function. [u32].
@ INFO_FUNC
Opens a new function scope.
@ INFO_NUM_SGPR
Number of SGPRs explicitly used by the function. [u32].
@ INFO_NUM_VGPR
Number of architectural VGPRs used by the function. [u32].
@ INFO_CALL
Direct call edge: the function calls the callee identified by the 8-byte relocated symbol.
@ INFO_NUM_AGPR
Number of accumulator VGPRs (AGPRs) used by the function. [u32].
@ INFO_TYPEID
Function type ID: tags an address-taken function with a type-ID string (at the given ....
@ INFO_PRIVATE_SEGMENT_SIZE
Private (scratch) memory size in bytes required by the function. [u32].
@ INFO_USE
Dependency edge: the function uses the resource identified by the 8-byte relocated symbol (e....
LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
LLVM_ABI GPUKind parseArchR600(StringRef CPU)
@ EF_AMDGPU_GENERIC_VERSION_MAX
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_V3
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_V3
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr std::underlying_type_t< Enum > to_underlying(Enum E)
Returns underlying integer value of an enum.
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
uint32_t group_segment_fixed_size
uint32_t compute_pgm_rsrc1
uint32_t private_segment_fixed_size
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3
int64_t kernel_code_entry_byte_offset