LLVM 20.0.0git
AMDGPUTargetStreamer.cpp
Go to the documentation of this file.
1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
14#include "AMDGPUMCExpr.h"
16#include "AMDGPUPTNote.h"
21#include "llvm/MC/MCAssembler.h"
22#include "llvm/MC/MCContext.h"
33
34using namespace llvm;
35using namespace llvm::AMDGPU;
36
37//===----------------------------------------------------------------------===//
38// AMDGPUTargetStreamer
39//===----------------------------------------------------------------------===//
40
42 ForceGenericVersion("amdgpu-force-generic-version",
43 cl::desc("Force a specific generic_v<N> flag to be "
44 "added. For testing purposes only."),
46
48 msgpack::Document HSAMetadataDoc;
49 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
50 return false;
51 return EmitHSAMetadata(HSAMetadataDoc, false);
52}
53
56
57 // clang-format off
58 switch (ElfMach) {
59 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
60 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
70 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
125 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
126 default: AK = GK_NONE; break;
127 }
128 // clang-format on
129
130 StringRef GPUName = getArchNameAMDGCN(AK);
131 if (GPUName != "")
132 return GPUName;
133 return getArchNameR600(AK);
134}
135
138 if (AK == AMDGPU::GPUKind::GK_NONE)
139 AK = parseArchR600(GPU);
140
141 // clang-format off
142 switch (AK) {
210 }
211 // clang-format on
212
213 llvm_unreachable("unknown GPU");
214}
215
216//===----------------------------------------------------------------------===//
217// AMDGPUTargetAsmStreamer
218//===----------------------------------------------------------------------===//
219
222 : AMDGPUTargetStreamer(S), OS(OS) { }
223
224// A hook for emitting stuff at the end.
225// We use it for emitting the accumulated PAL metadata as directives.
226// The PAL metadata is reset after it is emitted.
228 std::string S;
230 OS << S;
231
232 // Reset the pal metadata so its data will not affect a compilation that
233 // reuses this object.
235}
236
238 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
239}
240
242 unsigned COV) {
244 OS << "\t.amdhsa_code_object_version " << COV << '\n';
245}
246
248 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
249 const MCAsmInfo *MAI) {
251 };
252
253 OS << "\t.amd_kernel_code_t\n";
254 Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint);
255 OS << "\t.end_amd_kernel_code_t\n";
256}
257
259 unsigned Type) {
260 switch (Type) {
261 default: llvm_unreachable("Invalid AMDGPU symbol type");
263 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
264 break;
265 }
266}
267
269 Align Alignment) {
270 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
271 << Alignment.value() << '\n';
272}
273
275 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
276 return true;
277}
278
280 msgpack::Document &HSAMetadataDoc, bool Strict) {
282 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
283 return false;
284
285 std::string HSAMetadataString;
286 raw_string_ostream StrOS(HSAMetadataString);
287 HSAMetadataDoc.toYAML(StrOS);
288
289 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
290 OS << StrOS.str() << '\n';
291 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
292 return true;
293}
294
296 const MCSubtargetInfo &STI, bool TrapEnabled) {
297 OS << (TrapEnabled ? "\ts_trap 2" : "\ts_endpgm")
298 << " ; Kernarg preload header. Trap with incompatible firmware that "
299 "doesn't support preloading kernel arguments.\n";
300 OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
301 return true;
302}
303
305 const uint32_t Encoded_s_code_end = 0xbf9f0000;
306 const uint32_t Encoded_s_nop = 0xbf800000;
307 uint32_t Encoded_pad = Encoded_s_code_end;
308
309 // Instruction cache line size in bytes.
310 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
311 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
312
313 // Extra padding amount in bytes to support prefetch mode 3.
314 unsigned FillSize = 3 * CacheLineSize;
315
316 if (AMDGPU::isGFX90A(STI)) {
317 Encoded_pad = Encoded_s_nop;
318 FillSize = 16 * CacheLineSize;
319 }
320
321 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
322 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
323 return true;
324}
325
327 const MCSubtargetInfo &STI, StringRef KernelName,
328 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
329 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
330 const MCExpr *ReserveFlatScr) {
331 IsaVersion IVersion = getIsaVersion(STI.getCPU());
332 const MCAsmInfo *MAI = getContext().getAsmInfo();
333
334 OS << "\t.amdhsa_kernel " << KernelName << '\n';
335
336 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
338 OS << "\t\t" << Directive << ' ';
339 const MCExpr *ShiftedAndMaskedExpr =
340 MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
341 const MCExpr *New = foldAMDGPUMCExpr(ShiftedAndMaskedExpr, getContext());
342 printAMDGPUMCExpr(New, OS, MAI);
343 OS << '\n';
344 };
345
346 auto EmitMCExpr = [&](const MCExpr *Value) {
348 printAMDGPUMCExpr(NewExpr, OS, MAI);
349 };
350
351 OS << "\t\t.amdhsa_group_segment_fixed_size ";
352 EmitMCExpr(KD.group_segment_fixed_size);
353 OS << '\n';
354
355 OS << "\t\t.amdhsa_private_segment_fixed_size ";
356 EmitMCExpr(KD.private_segment_fixed_size);
357 OS << '\n';
358
359 OS << "\t\t.amdhsa_kernarg_size ";
360 EmitMCExpr(KD.kernarg_size);
361 OS << '\n';
362
364 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
365 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
366
370 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
371 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
372 ".amdhsa_user_sgpr_private_segment_buffer");
374 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
375 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
376 ".amdhsa_user_sgpr_dispatch_ptr");
378 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
379 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
380 ".amdhsa_user_sgpr_queue_ptr");
382 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
383 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
384 ".amdhsa_user_sgpr_kernarg_segment_ptr");
386 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
387 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
388 ".amdhsa_user_sgpr_dispatch_id");
391 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
392 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
393 ".amdhsa_user_sgpr_flat_scratch_init");
394 if (hasKernargPreload(STI)) {
395 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
396 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
397 ".amdhsa_user_sgpr_kernarg_preload_length");
398 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
399 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
400 ".amdhsa_user_sgpr_kernarg_preload_offset");
401 }
404 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
405 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
406 ".amdhsa_user_sgpr_private_segment_size");
407 if (IVersion.Major >= 10)
409 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
410 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
411 ".amdhsa_wavefront_size32");
414 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
415 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
416 ".amdhsa_uses_dynamic_stack");
418 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
419 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
421 ? ".amdhsa_enable_private_segment"
422 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
424 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
425 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
426 ".amdhsa_system_sgpr_workgroup_id_x");
428 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
429 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
430 ".amdhsa_system_sgpr_workgroup_id_y");
432 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
433 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
434 ".amdhsa_system_sgpr_workgroup_id_z");
436 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
437 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
438 ".amdhsa_system_sgpr_workgroup_info");
440 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
441 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
442 ".amdhsa_system_vgpr_workitem_id");
443
444 // These directives are required.
445 OS << "\t\t.amdhsa_next_free_vgpr ";
446 EmitMCExpr(NextVGPR);
447 OS << '\n';
448
449 OS << "\t\t.amdhsa_next_free_sgpr ";
450 EmitMCExpr(NextSGPR);
451 OS << '\n';
452
453 if (AMDGPU::isGFX90A(STI)) {
454 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
455 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
457 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
458 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
459 accum_bits = MCBinaryExpr::createAdd(
460 accum_bits, MCConstantExpr::create(1, getContext()), getContext());
461 accum_bits = MCBinaryExpr::createMul(
462 accum_bits, MCConstantExpr::create(4, getContext()), getContext());
463 OS << "\t\t.amdhsa_accum_offset ";
464 const MCExpr *New = foldAMDGPUMCExpr(accum_bits, getContext());
465 printAMDGPUMCExpr(New, OS, MAI);
466 OS << '\n';
467 }
468
469 OS << "\t\t.amdhsa_reserve_vcc ";
470 EmitMCExpr(ReserveVCC);
471 OS << '\n';
472
473 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
474 OS << "\t\t.amdhsa_reserve_flat_scratch ";
475 EmitMCExpr(ReserveFlatScr);
476 OS << '\n';
477 }
478
479 switch (CodeObjectVersion) {
480 default:
481 break;
484 if (getTargetID()->isXnackSupported())
485 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
486 break;
487 }
488
490 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
491 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
492 ".amdhsa_float_round_mode_32");
494 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
495 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
496 ".amdhsa_float_round_mode_16_64");
498 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
499 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
500 ".amdhsa_float_denorm_mode_32");
502 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
503 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
504 ".amdhsa_float_denorm_mode_16_64");
505 if (IVersion.Major < 12) {
507 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
508 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
509 ".amdhsa_dx10_clamp");
511 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
512 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
513 ".amdhsa_ieee_mode");
514 }
515 if (IVersion.Major >= 9) {
517 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
518 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
519 ".amdhsa_fp16_overflow");
520 }
521 if (AMDGPU::isGFX90A(STI))
523 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
524 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
525 if (IVersion.Major >= 10) {
527 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
528 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
529 ".amdhsa_workgroup_processor_mode");
531 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
532 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
533 ".amdhsa_memory_ordered");
535 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
536 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
537 ".amdhsa_forward_progress");
538 }
539 if (IVersion.Major >= 10 && IVersion.Major < 12) {
541 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
542 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
543 ".amdhsa_shared_vgpr_count");
544 }
545 if (IVersion.Major >= 12) {
547 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
548 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
549 ".amdhsa_round_robin_scheduling");
550 }
553 amdhsa::
554 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
555 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
556 ".amdhsa_exception_fp_ieee_invalid_op");
559 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
560 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
561 ".amdhsa_exception_fp_denorm_src");
564 amdhsa::
565 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
566 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
567 ".amdhsa_exception_fp_ieee_div_zero");
570 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
571 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
572 ".amdhsa_exception_fp_ieee_overflow");
575 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
576 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
577 ".amdhsa_exception_fp_ieee_underflow");
580 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
581 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
582 ".amdhsa_exception_fp_ieee_inexact");
585 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
586 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
587 ".amdhsa_exception_int_div_zero");
588
589 OS << "\t.end_amdhsa_kernel\n";
590}
591
592//===----------------------------------------------------------------------===//
593// AMDGPUTargetELFStreamer
594//===----------------------------------------------------------------------===//
595
597 const MCSubtargetInfo &STI)
598 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
599
601 return static_cast<MCELFStreamer &>(Streamer);
602}
603
604// A hook for emitting stuff at the end.
605// We use it for emitting the accumulated PAL metadata as a .note record.
606// The PAL metadata is reset after it is emitted.
609 W.setELFHeaderEFlags(getEFlags());
610 W.setOverrideABIVersion(
612
613 std::string Blob;
614 const char *Vendor = getPALMetadata()->getVendor();
615 unsigned Type = getPALMetadata()->getType();
616 getPALMetadata()->toBlob(Type, Blob);
617 if (Blob.empty())
618 return;
619 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
620 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
621
622 // Reset the pal metadata so its data will not affect a compilation that
623 // reuses this object.
625}
626
627void AMDGPUTargetELFStreamer::EmitNote(
628 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
629 function_ref<void(MCELFStreamer &)> EmitDesc) {
630 auto &S = getStreamer();
631 auto &Context = S.getContext();
632
633 auto NameSZ = Name.size() + 1;
634
635 unsigned NoteFlags = 0;
636 // TODO Apparently, this is currently needed for OpenCL as mentioned in
637 // https://reviews.llvm.org/D74995
638 if (isHsaAbi(STI))
639 NoteFlags = ELF::SHF_ALLOC;
640
641 S.pushSection();
642 S.switchSection(
643 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
644 S.emitInt32(NameSZ); // namesz
645 S.emitValue(DescSZ, 4); // descz
646 S.emitInt32(NoteType); // type
647 S.emitBytes(Name); // name
648 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
649 EmitDesc(S); // desc
650 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
651 S.popSection();
652}
653
654unsigned AMDGPUTargetELFStreamer::getEFlags() {
655 switch (STI.getTargetTriple().getArch()) {
656 default:
657 llvm_unreachable("Unsupported Arch");
658 case Triple::r600:
659 return getEFlagsR600();
660 case Triple::amdgcn:
661 return getEFlagsAMDGCN();
662 }
663}
664
665unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
667
668 return getElfMach(STI.getCPU());
669}
670
671unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
673
674 switch (STI.getTargetTriple().getOS()) {
675 default:
676 // TODO: Why are some tests have "mingw" listed as OS?
677 // llvm_unreachable("Unsupported OS");
679 return getEFlagsUnknownOS();
680 case Triple::AMDHSA:
681 return getEFlagsAMDHSA();
682 case Triple::AMDPAL:
683 return getEFlagsAMDPAL();
684 case Triple::Mesa3D:
685 return getEFlagsMesa3D();
686 }
687}
688
689unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
690 // TODO: Why are some tests have "mingw" listed as OS?
691 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
692
693 return getEFlagsV3();
694}
695
696unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
697 assert(isHsaAbi(STI));
698
699 if (CodeObjectVersion >= 6)
700 return getEFlagsV6();
701 return getEFlagsV4();
702}
703
704unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
706
707 return getEFlagsV3();
708}
709
710unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
712
713 return getEFlagsV3();
714}
715
716unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
717 unsigned EFlagsV3 = 0;
718
719 // mach.
720 EFlagsV3 |= getElfMach(STI.getCPU());
721
722 // xnack.
723 if (getTargetID()->isXnackOnOrAny())
725 // sramecc.
726 if (getTargetID()->isSramEccOnOrAny())
728
729 return EFlagsV3;
730}
731
732unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
733 unsigned EFlagsV4 = 0;
734
735 // mach.
736 EFlagsV4 |= getElfMach(STI.getCPU());
737
738 // xnack.
739 switch (getTargetID()->getXnackSetting()) {
742 break;
745 break;
748 break;
751 break;
752 }
753 // sramecc.
754 switch (getTargetID()->getSramEccSetting()) {
757 break;
760 break;
763 break;
766 break;
767 }
768
769 return EFlagsV4;
770}
771
772unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
773 unsigned Flags = getEFlagsV4();
774
775 unsigned Version = ForceGenericVersion;
776 if (!Version) {
777 switch (parseArchAMDGCN(STI.getCPU())) {
780 break;
783 break;
786 break;
789 break;
792 break;
793 default:
794 break;
795 }
796 }
797
798 // Versions start at 1.
799 if (Version) {
801 report_fatal_error("Cannot encode generic code object version " +
802 Twine(Version) +
803 " - no ELF flag can represent this version!");
805 }
806
807 return Flags;
808}
809
811
814 OS.pushSection();
815 Header.EmitKernelCodeT(OS, getContext());
816 OS.popSection();
817}
818
820 unsigned Type) {
821 MCSymbolELF *Symbol = cast<MCSymbolELF>(
822 getStreamer().getContext().getOrCreateSymbol(SymbolName));
823 Symbol->setType(Type);
824}
825
827 Align Alignment) {
828 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
829 SymbolELF->setType(ELF::STT_OBJECT);
830
831 if (!SymbolELF->isBindingSet())
832 SymbolELF->setBinding(ELF::STB_GLOBAL);
833
834 if (SymbolELF->declareCommon(Size, Alignment, true)) {
835 report_fatal_error("Symbol: " + Symbol->getName() +
836 " redeclared as different type");
837 }
838
839 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
841}
842
844 // Create two labels to mark the beginning and end of the desc field
845 // and a MCExpr to calculate the size of the desc field.
846 auto &Context = getContext();
847 auto *DescBegin = Context.createTempSymbol();
848 auto *DescEnd = Context.createTempSymbol();
849 auto *DescSZ = MCBinaryExpr::createSub(
850 MCSymbolRefExpr::create(DescEnd, Context),
851 MCSymbolRefExpr::create(DescBegin, Context), Context);
852
854 [&](MCELFStreamer &OS) {
855 OS.emitLabel(DescBegin);
856 OS.emitBytes(getTargetID()->toString());
857 OS.emitLabel(DescEnd);
858 });
859 return true;
860}
861
863 bool Strict) {
865 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
866 return false;
867
868 std::string HSAMetadataString;
869 HSAMetadataDoc.writeToBlob(HSAMetadataString);
870
871 // Create two labels to mark the beginning and end of the desc field
872 // and a MCExpr to calculate the size of the desc field.
873 auto &Context = getContext();
874 auto *DescBegin = Context.createTempSymbol();
875 auto *DescEnd = Context.createTempSymbol();
876 auto *DescSZ = MCBinaryExpr::createSub(
877 MCSymbolRefExpr::create(DescEnd, Context),
878 MCSymbolRefExpr::create(DescBegin, Context), Context);
879
881 [&](MCELFStreamer &OS) {
882 OS.emitLabel(DescBegin);
883 OS.emitBytes(HSAMetadataString);
884 OS.emitLabel(DescEnd);
885 });
886 return true;
887}
888
890 const MCSubtargetInfo &STI, bool TrapEnabled) {
891 const uint32_t Encoded_s_nop = 0xbf800000;
892 const uint32_t Encoded_s_trap = 0xbf920002;
893 const uint32_t Encoded_s_endpgm = 0xbf810000;
894 const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm;
896 OS.emitInt32(TrapInstr);
897 for (int i = 0; i < 63; ++i) {
898 OS.emitInt32(Encoded_s_nop);
899 }
900 return true;
901}
902
904 const uint32_t Encoded_s_code_end = 0xbf9f0000;
905 const uint32_t Encoded_s_nop = 0xbf800000;
906 uint32_t Encoded_pad = Encoded_s_code_end;
907
908 // Instruction cache line size in bytes.
909 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
910 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
911
912 // Extra padding amount in bytes to support prefetch mode 3.
913 unsigned FillSize = 3 * CacheLineSize;
914
915 if (AMDGPU::isGFX90A(STI)) {
916 Encoded_pad = Encoded_s_nop;
917 FillSize = 16 * CacheLineSize;
918 }
919
921 OS.pushSection();
922 OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4);
923 for (unsigned I = 0; I < FillSize; I += 4)
924 OS.emitInt32(Encoded_pad);
925 OS.popSection();
926 return true;
927}
928
930 const MCSubtargetInfo &STI, StringRef KernelName,
931 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
932 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
933 const MCExpr *ReserveFlatScr) {
934 auto &Streamer = getStreamer();
935 auto &Context = Streamer.getContext();
936
937 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
938 Context.getOrCreateSymbol(Twine(KernelName)));
939 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
940 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
941
942 // Copy kernel descriptor symbol's binding, other and visibility from the
943 // kernel code symbol.
944 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
945 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
946 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
947 // Kernel descriptor symbol's type and size are fixed.
948 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
949 KernelDescriptorSymbol->setSize(
951
952 // The visibility of the kernel code symbol must be protected or less to allow
953 // static relocations from the kernel descriptor to be used.
954 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
955 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
956
957 Streamer.emitLabel(KernelDescriptorSymbol);
958 Streamer.emitValue(
959 KernelDescriptor.group_segment_fixed_size,
961 Streamer.emitValue(
962 KernelDescriptor.private_segment_fixed_size,
964 Streamer.emitValue(KernelDescriptor.kernarg_size,
966
967 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
968 Streamer.emitInt8(0u);
969
970 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
971 // expression being created is:
972 // (start of kernel code) - (start of kernel descriptor)
973 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
974 Streamer.emitValue(
976 MCSymbolRefExpr::create(KernelCodeSymbol,
978 MCSymbolRefExpr::create(KernelDescriptorSymbol,
979 MCSymbolRefExpr::VK_None, Context),
980 Context),
982 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
983 Streamer.emitInt8(0u);
984 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
986 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
988 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
990 Streamer.emitValue(
991 KernelDescriptor.kernel_code_properties,
993 Streamer.emitValue(KernelDescriptor.kernarg_preload,
995 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
996 Streamer.emitInt8(0u);
997}
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
This is a verifier for AMDGPU HSA metadata, which can verify both well-typed metadata and untyped met...
AMDGPU metadata definitions and in-memory representations.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
std::string Name
uint64_t Size
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
verify safepoint Safepoint IR Verifier
raw_pwrite_stream & OS
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
const char * getVendor() const
void toBlob(unsigned Type, std::string &S)
void toString(std::string &S)
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:587
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:617
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
const MCAsmInfo * getAsmInfo() const
Definition: MCContext.h:412
ELFObjectWriter & getWriter()
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Streaming machine code generation interface.
Definition: MCStreamer.h:213
MCContext & getContext() const
Definition: MCStreamer.h:300
void emitValue(const MCExpr *Value, unsigned Size, SMLoc Loc=SMLoc())
Definition: MCStreamer.cpp:179
virtual void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc())
Emit a label for Symbol into the current section.
Definition: MCStreamer.cpp:414
void emitInt8(uint64_t Value)
Definition: MCStreamer.h:717
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
StringRef getCPU() const
unsigned getOther() const
void setVisibility(unsigned Visibility)
void setSize(const MCExpr *SS)
Definition: MCSymbolELF.h:23
bool isBindingSet() const
void setBinding(unsigned Binding) const
Definition: MCSymbolELF.cpp:43
unsigned getVisibility() const
unsigned getBinding() const
Definition: MCSymbolELF.cpp:66
void setType(unsigned Type) const
Definition: MCSymbolELF.cpp:94
void setOther(unsigned Other)
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setIndex(uint32_t Value) const
Set the (implementation defined) index.
Definition: MCSymbol.h:321
bool declareCommon(uint64_t Size, Align Alignment, bool Target=false)
Declare this symbol as being 'common'.
Definition: MCSymbol.h:375
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:382
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:373
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:679
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char NoteNameV2[]
Definition: AMDGPUPTNote.h:26
const char SectionName[]
Definition: AMDGPUPTNote.h:24
const char NoteNameV3[]
Definition: AMDGPUPTNote.h:27
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
Definition: TargetParser.h:35
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
GPUKind parseArchR600(StringRef CPU)
@ STV_PROTECTED
Definition: ELF.h:1372
@ STV_DEFAULT
Definition: ELF.h:1369
@ EF_AMDGPU_GENERIC_VERSION_MAX
Definition: ELF.h:883
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
Definition: ELF.h:860
@ EF_AMDGPU_MACH_AMDGCN_GFX703
Definition: ELF.h:772
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
Definition: ELF.h:796
@ EF_AMDGPU_FEATURE_SRAMECC_V3
Definition: ELF.h:851
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
Definition: ELF.h:790
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
Definition: ELF.h:881
@ EF_AMDGPU_MACH_R600_CAYMAN
Definition: ELF.h:754
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
Definition: ELF.h:871
@ EF_AMDGPU_MACH_AMDGCN_GFX704
Definition: ELF.h:773
@ EF_AMDGPU_MACH_AMDGCN_GFX902
Definition: ELF.h:780
@ EF_AMDGPU_MACH_AMDGCN_GFX810
Definition: ELF.h:778
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
Definition: ELF.h:804
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
Definition: ELF.h:806
@ EF_AMDGPU_MACH_R600_RV730
Definition: ELF.h:743
@ EF_AMDGPU_MACH_R600_RV710
Definition: ELF.h:742
@ EF_AMDGPU_MACH_AMDGCN_GFX908
Definition: ELF.h:783
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
Definition: ELF.h:787
@ EF_AMDGPU_MACH_R600_CYPRESS
Definition: ELF.h:747
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
Definition: ELF.h:791
@ EF_AMDGPU_MACH_R600_R600
Definition: ELF.h:737
@ EF_AMDGPU_MACH_AMDGCN_GFX940
Definition: ELF.h:799
@ EF_AMDGPU_MACH_AMDGCN_GFX941
Definition: ELF.h:810
@ EF_AMDGPU_MACH_R600_TURKS
Definition: ELF.h:755
@ EF_AMDGPU_MACH_R600_JUNIPER
Definition: ELF.h:748
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
Definition: ELF.h:875
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
Definition: ELF.h:858
@ EF_AMDGPU_MACH_AMDGCN_GFX601
Definition: ELF.h:768
@ EF_AMDGPU_MACH_AMDGCN_GFX942
Definition: ELF.h:811
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
Definition: ELF.h:820
@ EF_AMDGPU_MACH_R600_R630
Definition: ELF.h:738
@ EF_AMDGPU_MACH_R600_REDWOOD
Definition: ELF.h:749
@ EF_AMDGPU_MACH_R600_RV770
Definition: ELF.h:744
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
Definition: ELF.h:862
@ EF_AMDGPU_MACH_AMDGCN_GFX600
Definition: ELF.h:767
@ EF_AMDGPU_FEATURE_XNACK_V3
Definition: ELF.h:846
@ EF_AMDGPU_MACH_AMDGCN_GFX602
Definition: ELF.h:793
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
Definition: ELF.h:805
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
Definition: ELF.h:800
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
Definition: ELF.h:792
@ EF_AMDGPU_MACH_AMDGCN_GFX801
Definition: ELF.h:775
@ EF_AMDGPU_MACH_AMDGCN_GFX705
Definition: ELF.h:794
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
Definition: ELF.h:786
@ EF_AMDGPU_MACH_R600_RV670
Definition: ELF.h:740
@ EF_AMDGPU_MACH_AMDGCN_GFX701
Definition: ELF.h:770
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
Definition: ELF.h:818
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
Definition: ELF.h:788
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
Definition: ELF.h:809
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
Definition: ELF.h:789
@ EF_AMDGPU_MACH_R600_CEDAR
Definition: ELF.h:746
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
Definition: ELF.h:807
@ EF_AMDGPU_MACH_AMDGCN_GFX700
Definition: ELF.h:769
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
Definition: ELF.h:819
@ EF_AMDGPU_MACH_AMDGCN_GFX803
Definition: ELF.h:777
@ EF_AMDGPU_MACH_AMDGCN_GFX802
Definition: ELF.h:776
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
Definition: ELF.h:785
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
Definition: ELF.h:864
@ EF_AMDGPU_MACH_AMDGCN_GFX900
Definition: ELF.h:779
@ EF_AMDGPU_MACH_AMDGCN_GFX909
Definition: ELF.h:784
@ EF_AMDGPU_MACH_AMDGCN_GFX906
Definition: ELF.h:782
@ EF_AMDGPU_MACH_NONE
Definition: ELF.h:732
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
Definition: ELF.h:816
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
Definition: ELF.h:803
@ EF_AMDGPU_MACH_R600_CAICOS
Definition: ELF.h:753
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
Definition: ELF.h:798
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
Definition: ELF.h:797
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
Definition: ELF.h:801
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
Definition: ELF.h:824
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
Definition: ELF.h:817
@ EF_AMDGPU_MACH_AMDGCN_GFX904
Definition: ELF.h:781
@ EF_AMDGPU_MACH_R600_RS880
Definition: ELF.h:739
@ EF_AMDGPU_MACH_AMDGCN_GFX805
Definition: ELF.h:795
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
Definition: ELF.h:813
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
Definition: ELF.h:802
@ EF_AMDGPU_MACH_R600_SUMO
Definition: ELF.h:750
@ EF_AMDGPU_MACH_R600_BARTS
Definition: ELF.h:752
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
Definition: ELF.h:873
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
Definition: ELF.h:877
@ EF_AMDGPU_MACH_AMDGCN_GFX702
Definition: ELF.h:771
@ SHT_NOTE
Definition: ELF.h:1095
@ NT_AMD_HSA_ISA_NAME
Definition: ELF.h:1903
@ STB_GLOBAL
Definition: ELF.h:1340
@ SHF_ALLOC
Definition: ELF.h:1186
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1365
@ STT_OBJECT
Definition: ELF.h:1352
@ SHN_AMDGPU_LDS
Definition: ELF.h:1893
@ NT_AMDGPU_METADATA
Definition: ELF.h:1910
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
Instruction set architecture version.
Definition: TargetParser.h:127
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85