LLVM 20.0.0git
AMDGPUTargetStreamer.cpp
Go to the documentation of this file.
1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
14#include "AMDGPUMCExpr.h"
16#include "AMDGPUPTNote.h"
21#include "llvm/MC/MCAssembler.h"
22#include "llvm/MC/MCContext.h"
32
33using namespace llvm;
34using namespace llvm::AMDGPU;
35
36//===----------------------------------------------------------------------===//
37// AMDGPUTargetStreamer
38//===----------------------------------------------------------------------===//
39
41 ForceGenericVersion("amdgpu-force-generic-version",
42 cl::desc("Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
45
47 msgpack::Document HSAMetadataDoc;
48 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
49 return false;
50 return EmitHSAMetadata(HSAMetadataDoc, false);
51}
52
55
56 // clang-format off
57 switch (ElfMach) {
58 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
59 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
69 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
127 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
128 default: AK = GK_NONE; break;
129 }
130 // clang-format on
131
132 StringRef GPUName = getArchNameAMDGCN(AK);
133 if (GPUName != "")
134 return GPUName;
135 return getArchNameR600(AK);
136}
137
140 if (AK == AMDGPU::GPUKind::GK_NONE)
141 AK = parseArchR600(GPU);
142
143 // clang-format off
144 switch (AK) {
215 }
216 // clang-format on
217
218 llvm_unreachable("unknown GPU");
219}
220
221//===----------------------------------------------------------------------===//
222// AMDGPUTargetAsmStreamer
223//===----------------------------------------------------------------------===//
224
227 : AMDGPUTargetStreamer(S), OS(OS) { }
228
229// A hook for emitting stuff at the end.
230// We use it for emitting the accumulated PAL metadata as directives.
231// The PAL metadata is reset after it is emitted.
233 std::string S;
235 OS << S;
236
237 // Reset the pal metadata so its data will not affect a compilation that
238 // reuses this object.
240}
241
243 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
244}
245
247 unsigned COV) {
249 OS << "\t.amdhsa_code_object_version " << COV << '\n';
250}
251
253 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
254 const MCAsmInfo *MAI) {
256 };
257
258 OS << "\t.amd_kernel_code_t\n";
259 Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint);
260 OS << "\t.end_amd_kernel_code_t\n";
261}
262
264 unsigned Type) {
265 switch (Type) {
266 default: llvm_unreachable("Invalid AMDGPU symbol type");
268 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
269 break;
270 }
271}
272
274 Align Alignment) {
275 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
276 << Alignment.value() << '\n';
277}
278
280 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR,
281 const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize,
282 const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch,
283 const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion,
284 const MCSymbol *HasIndirectCall) {
285#define PRINT_RES_INFO(ARG) \
286 OS << "\t.set "; \
287 ARG->print(OS, getContext().getAsmInfo()); \
288 OS << ", "; \
289 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \
290 Streamer.addBlankLine();
291
292 PRINT_RES_INFO(NumVGPR);
293 PRINT_RES_INFO(NumAGPR);
294 PRINT_RES_INFO(NumExplicitSGPR);
295 PRINT_RES_INFO(PrivateSegmentSize);
296 PRINT_RES_INFO(UsesVCC);
297 PRINT_RES_INFO(UsesFlatScratch);
298 PRINT_RES_INFO(HasDynamicallySizedStack);
299 PRINT_RES_INFO(HasRecursion);
300 PRINT_RES_INFO(HasIndirectCall);
301#undef PRINT_RES_INFO
302}
303
305 const MCSymbol *MaxAGPR,
306 const MCSymbol *MaxSGPR) {
307#define PRINT_RES_INFO(ARG) \
308 OS << "\t.set "; \
309 ARG->print(OS, getContext().getAsmInfo()); \
310 OS << ", "; \
311 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \
312 Streamer.addBlankLine();
313
314 PRINT_RES_INFO(MaxVGPR);
315 PRINT_RES_INFO(MaxAGPR);
316 PRINT_RES_INFO(MaxSGPR);
317#undef PRINT_RES_INFO
318}
319
321 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
322 return true;
323}
324
326 msgpack::Document &HSAMetadataDoc, bool Strict) {
328 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
329 return false;
330
331 std::string HSAMetadataString;
332 raw_string_ostream StrOS(HSAMetadataString);
333 HSAMetadataDoc.toYAML(StrOS);
334
335 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
336 OS << StrOS.str() << '\n';
337 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
338 return true;
339}
340
342 const MCSubtargetInfo &STI, bool TrapEnabled) {
343 OS << (TrapEnabled ? "\ts_trap 2" : "\ts_endpgm")
344 << " ; Kernarg preload header. Trap with incompatible firmware that "
345 "doesn't support preloading kernel arguments.\n";
346 OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
347 return true;
348}
349
351 const uint32_t Encoded_s_code_end = 0xbf9f0000;
352 const uint32_t Encoded_s_nop = 0xbf800000;
353 uint32_t Encoded_pad = Encoded_s_code_end;
354
355 // Instruction cache line size in bytes.
356 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
357 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
358
359 // Extra padding amount in bytes to support prefetch mode 3.
360 unsigned FillSize = 3 * CacheLineSize;
361
362 if (AMDGPU::isGFX90A(STI)) {
363 Encoded_pad = Encoded_s_nop;
364 FillSize = 16 * CacheLineSize;
365 }
366
367 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
368 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
369 return true;
370}
371
373 const MCSubtargetInfo &STI, StringRef KernelName,
374 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
375 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
376 const MCExpr *ReserveFlatScr) {
377 IsaVersion IVersion = getIsaVersion(STI.getCPU());
378 const MCAsmInfo *MAI = getContext().getAsmInfo();
379
380 OS << "\t.amdhsa_kernel " << KernelName << '\n';
381
382 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
384 OS << "\t\t" << Directive << ' ';
385 const MCExpr *ShiftedAndMaskedExpr =
386 MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
387 const MCExpr *New = foldAMDGPUMCExpr(ShiftedAndMaskedExpr, getContext());
388 printAMDGPUMCExpr(New, OS, MAI);
389 OS << '\n';
390 };
391
392 auto EmitMCExpr = [&](const MCExpr *Value) {
394 printAMDGPUMCExpr(NewExpr, OS, MAI);
395 };
396
397 OS << "\t\t.amdhsa_group_segment_fixed_size ";
398 EmitMCExpr(KD.group_segment_fixed_size);
399 OS << '\n';
400
401 OS << "\t\t.amdhsa_private_segment_fixed_size ";
402 EmitMCExpr(KD.private_segment_fixed_size);
403 OS << '\n';
404
405 OS << "\t\t.amdhsa_kernarg_size ";
406 EmitMCExpr(KD.kernarg_size);
407 OS << '\n';
408
410 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
411 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
412
416 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
417 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
418 ".amdhsa_user_sgpr_private_segment_buffer");
420 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
421 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
422 ".amdhsa_user_sgpr_dispatch_ptr");
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
425 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
426 ".amdhsa_user_sgpr_queue_ptr");
428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
429 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
430 ".amdhsa_user_sgpr_kernarg_segment_ptr");
432 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
433 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
434 ".amdhsa_user_sgpr_dispatch_id");
437 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
438 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
439 ".amdhsa_user_sgpr_flat_scratch_init");
440 if (hasKernargPreload(STI)) {
441 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
442 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
443 ".amdhsa_user_sgpr_kernarg_preload_length");
444 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
445 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
446 ".amdhsa_user_sgpr_kernarg_preload_offset");
447 }
450 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
451 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
452 ".amdhsa_user_sgpr_private_segment_size");
453 if (IVersion.Major >= 10)
455 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
456 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
457 ".amdhsa_wavefront_size32");
460 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
461 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
462 ".amdhsa_uses_dynamic_stack");
464 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
465 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
467 ? ".amdhsa_enable_private_segment"
468 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
470 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
471 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
472 ".amdhsa_system_sgpr_workgroup_id_x");
474 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
475 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
476 ".amdhsa_system_sgpr_workgroup_id_y");
478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
479 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
480 ".amdhsa_system_sgpr_workgroup_id_z");
482 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
483 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
484 ".amdhsa_system_sgpr_workgroup_info");
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
487 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
488 ".amdhsa_system_vgpr_workitem_id");
489
490 // These directives are required.
491 OS << "\t\t.amdhsa_next_free_vgpr ";
492 EmitMCExpr(NextVGPR);
493 OS << '\n';
494
495 OS << "\t\t.amdhsa_next_free_sgpr ";
496 EmitMCExpr(NextSGPR);
497 OS << '\n';
498
499 if (AMDGPU::isGFX90A(STI)) {
500 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
501 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
503 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
504 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
505 accum_bits = MCBinaryExpr::createAdd(
506 accum_bits, MCConstantExpr::create(1, getContext()), getContext());
507 accum_bits = MCBinaryExpr::createMul(
508 accum_bits, MCConstantExpr::create(4, getContext()), getContext());
509 OS << "\t\t.amdhsa_accum_offset ";
510 const MCExpr *New = foldAMDGPUMCExpr(accum_bits, getContext());
511 printAMDGPUMCExpr(New, OS, MAI);
512 OS << '\n';
513 }
514
515 OS << "\t\t.amdhsa_reserve_vcc ";
516 EmitMCExpr(ReserveVCC);
517 OS << '\n';
518
519 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
520 OS << "\t\t.amdhsa_reserve_flat_scratch ";
521 EmitMCExpr(ReserveFlatScr);
522 OS << '\n';
523 }
524
525 switch (CodeObjectVersion) {
526 default:
527 break;
530 if (getTargetID()->isXnackSupported())
531 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
532 break;
533 }
534
536 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
537 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
538 ".amdhsa_float_round_mode_32");
540 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
541 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
542 ".amdhsa_float_round_mode_16_64");
544 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
545 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
546 ".amdhsa_float_denorm_mode_32");
548 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
549 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
550 ".amdhsa_float_denorm_mode_16_64");
551 if (IVersion.Major < 12) {
553 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
554 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
555 ".amdhsa_dx10_clamp");
557 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
558 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
559 ".amdhsa_ieee_mode");
560 }
561 if (IVersion.Major >= 9) {
563 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
564 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
565 ".amdhsa_fp16_overflow");
566 }
567 if (AMDGPU::isGFX90A(STI))
569 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
570 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
571 if (IVersion.Major >= 10) {
573 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
574 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
575 ".amdhsa_workgroup_processor_mode");
577 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
578 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
579 ".amdhsa_memory_ordered");
581 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
582 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
583 ".amdhsa_forward_progress");
584 }
585 if (IVersion.Major >= 10 && IVersion.Major < 12) {
587 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
588 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
589 ".amdhsa_shared_vgpr_count");
590 }
591 if (IVersion.Major >= 12) {
593 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
594 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
595 ".amdhsa_round_robin_scheduling");
596 }
599 amdhsa::
600 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
601 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
602 ".amdhsa_exception_fp_ieee_invalid_op");
605 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
606 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
607 ".amdhsa_exception_fp_denorm_src");
610 amdhsa::
611 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
612 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
613 ".amdhsa_exception_fp_ieee_div_zero");
616 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
617 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
618 ".amdhsa_exception_fp_ieee_overflow");
621 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
622 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
623 ".amdhsa_exception_fp_ieee_underflow");
626 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
627 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
628 ".amdhsa_exception_fp_ieee_inexact");
631 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
632 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
633 ".amdhsa_exception_int_div_zero");
634
635 OS << "\t.end_amdhsa_kernel\n";
636}
637
638//===----------------------------------------------------------------------===//
639// AMDGPUTargetELFStreamer
640//===----------------------------------------------------------------------===//
641
643 const MCSubtargetInfo &STI)
644 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
645
647 return static_cast<MCELFStreamer &>(Streamer);
648}
649
650// A hook for emitting stuff at the end.
651// We use it for emitting the accumulated PAL metadata as a .note record.
652// The PAL metadata is reset after it is emitted.
655 W.setELFHeaderEFlags(getEFlags());
656 W.setOverrideABIVersion(
658
659 std::string Blob;
660 const char *Vendor = getPALMetadata()->getVendor();
661 unsigned Type = getPALMetadata()->getType();
662 getPALMetadata()->toBlob(Type, Blob);
663 if (Blob.empty())
664 return;
665 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
666 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
667
668 // Reset the pal metadata so its data will not affect a compilation that
669 // reuses this object.
671}
672
673void AMDGPUTargetELFStreamer::EmitNote(
674 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
675 function_ref<void(MCELFStreamer &)> EmitDesc) {
676 auto &S = getStreamer();
677 auto &Context = S.getContext();
678
679 auto NameSZ = Name.size() + 1;
680
681 unsigned NoteFlags = 0;
682 // TODO Apparently, this is currently needed for OpenCL as mentioned in
683 // https://reviews.llvm.org/D74995
684 if (isHsaAbi(STI))
685 NoteFlags = ELF::SHF_ALLOC;
686
687 S.pushSection();
688 S.switchSection(
689 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
690 S.emitInt32(NameSZ); // namesz
691 S.emitValue(DescSZ, 4); // descz
692 S.emitInt32(NoteType); // type
693 S.emitBytes(Name); // name
694 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
695 EmitDesc(S); // desc
696 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
697 S.popSection();
698}
699
700unsigned AMDGPUTargetELFStreamer::getEFlags() {
701 switch (STI.getTargetTriple().getArch()) {
702 default:
703 llvm_unreachable("Unsupported Arch");
704 case Triple::r600:
705 return getEFlagsR600();
706 case Triple::amdgcn:
707 return getEFlagsAMDGCN();
708 }
709}
710
711unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
713
714 return getElfMach(STI.getCPU());
715}
716
717unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
719
720 switch (STI.getTargetTriple().getOS()) {
721 default:
722 // TODO: Why are some tests have "mingw" listed as OS?
723 // llvm_unreachable("Unsupported OS");
725 return getEFlagsUnknownOS();
726 case Triple::AMDHSA:
727 return getEFlagsAMDHSA();
728 case Triple::AMDPAL:
729 return getEFlagsAMDPAL();
730 case Triple::Mesa3D:
731 return getEFlagsMesa3D();
732 }
733}
734
735unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
736 // TODO: Why are some tests have "mingw" listed as OS?
737 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
738
739 return getEFlagsV3();
740}
741
742unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
743 assert(isHsaAbi(STI));
744
745 if (CodeObjectVersion >= 6)
746 return getEFlagsV6();
747 return getEFlagsV4();
748}
749
750unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
752
753 return getEFlagsV3();
754}
755
756unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
758
759 return getEFlagsV3();
760}
761
762unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
763 unsigned EFlagsV3 = 0;
764
765 // mach.
766 EFlagsV3 |= getElfMach(STI.getCPU());
767
768 // xnack.
769 if (getTargetID()->isXnackOnOrAny())
771 // sramecc.
772 if (getTargetID()->isSramEccOnOrAny())
774
775 return EFlagsV3;
776}
777
778unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
779 unsigned EFlagsV4 = 0;
780
781 // mach.
782 EFlagsV4 |= getElfMach(STI.getCPU());
783
784 // xnack.
785 switch (getTargetID()->getXnackSetting()) {
788 break;
791 break;
794 break;
797 break;
798 }
799 // sramecc.
800 switch (getTargetID()->getSramEccSetting()) {
803 break;
806 break;
809 break;
812 break;
813 }
814
815 return EFlagsV4;
816}
817
818unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
819 unsigned Flags = getEFlagsV4();
820
821 unsigned Version = ForceGenericVersion;
822 if (!Version) {
823 switch (parseArchAMDGCN(STI.getCPU())) {
826 break;
829 break;
832 break;
835 break;
838 break;
841 break;
842 default:
843 break;
844 }
845 }
846
847 // Versions start at 1.
848 if (Version) {
850 report_fatal_error("Cannot encode generic code object version " +
851 Twine(Version) +
852 " - no ELF flag can represent this version!");
854 }
855
856 return Flags;
857}
858
860
863 OS.pushSection();
864 Header.EmitKernelCodeT(OS, getContext());
865 OS.popSection();
866}
867
869 unsigned Type) {
870 MCSymbolELF *Symbol = cast<MCSymbolELF>(
871 getStreamer().getContext().getOrCreateSymbol(SymbolName));
872 Symbol->setType(Type);
873}
874
876 Align Alignment) {
877 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
878 SymbolELF->setType(ELF::STT_OBJECT);
879
880 if (!SymbolELF->isBindingSet())
881 SymbolELF->setBinding(ELF::STB_GLOBAL);
882
883 if (SymbolELF->declareCommon(Size, Alignment, true)) {
884 report_fatal_error("Symbol: " + Symbol->getName() +
885 " redeclared as different type");
886 }
887
888 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
890}
891
893 // Create two labels to mark the beginning and end of the desc field
894 // and a MCExpr to calculate the size of the desc field.
895 auto &Context = getContext();
896 auto *DescBegin = Context.createTempSymbol();
897 auto *DescEnd = Context.createTempSymbol();
898 auto *DescSZ = MCBinaryExpr::createSub(
899 MCSymbolRefExpr::create(DescEnd, Context),
900 MCSymbolRefExpr::create(DescBegin, Context), Context);
901
903 [&](MCELFStreamer &OS) {
904 OS.emitLabel(DescBegin);
905 OS.emitBytes(getTargetID()->toString());
906 OS.emitLabel(DescEnd);
907 });
908 return true;
909}
910
912 bool Strict) {
914 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
915 return false;
916
917 std::string HSAMetadataString;
918 HSAMetadataDoc.writeToBlob(HSAMetadataString);
919
920 // Create two labels to mark the beginning and end of the desc field
921 // and a MCExpr to calculate the size of the desc field.
922 auto &Context = getContext();
923 auto *DescBegin = Context.createTempSymbol();
924 auto *DescEnd = Context.createTempSymbol();
925 auto *DescSZ = MCBinaryExpr::createSub(
926 MCSymbolRefExpr::create(DescEnd, Context),
927 MCSymbolRefExpr::create(DescBegin, Context), Context);
928
930 [&](MCELFStreamer &OS) {
931 OS.emitLabel(DescBegin);
932 OS.emitBytes(HSAMetadataString);
933 OS.emitLabel(DescEnd);
934 });
935 return true;
936}
937
939 const MCSubtargetInfo &STI, bool TrapEnabled) {
940 const uint32_t Encoded_s_nop = 0xbf800000;
941 const uint32_t Encoded_s_trap = 0xbf920002;
942 const uint32_t Encoded_s_endpgm = 0xbf810000;
943 const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm;
945 OS.emitInt32(TrapInstr);
946 for (int i = 0; i < 63; ++i) {
947 OS.emitInt32(Encoded_s_nop);
948 }
949 return true;
950}
951
953 const uint32_t Encoded_s_code_end = 0xbf9f0000;
954 const uint32_t Encoded_s_nop = 0xbf800000;
955 uint32_t Encoded_pad = Encoded_s_code_end;
956
957 // Instruction cache line size in bytes.
958 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
959 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
960
961 // Extra padding amount in bytes to support prefetch mode 3.
962 unsigned FillSize = 3 * CacheLineSize;
963
964 if (AMDGPU::isGFX90A(STI)) {
965 Encoded_pad = Encoded_s_nop;
966 FillSize = 16 * CacheLineSize;
967 }
968
970 OS.pushSection();
971 OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4);
972 for (unsigned I = 0; I < FillSize; I += 4)
973 OS.emitInt32(Encoded_pad);
974 OS.popSection();
975 return true;
976}
977
979 const MCSubtargetInfo &STI, StringRef KernelName,
980 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
981 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
982 const MCExpr *ReserveFlatScr) {
983 auto &Streamer = getStreamer();
984 auto &Context = Streamer.getContext();
985
986 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
987 Context.getOrCreateSymbol(Twine(KernelName)));
988 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
989 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
990
991 // Copy kernel descriptor symbol's binding, other and visibility from the
992 // kernel code symbol.
993 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
994 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
995 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
996 // Kernel descriptor symbol's type and size are fixed.
997 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
998 KernelDescriptorSymbol->setSize(
1000
1001 // The visibility of the kernel code symbol must be protected or less to allow
1002 // static relocations from the kernel descriptor to be used.
1003 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
1004 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
1005
1006 Streamer.emitLabel(KernelDescriptorSymbol);
1007 Streamer.emitValue(
1008 KernelDescriptor.group_segment_fixed_size,
1010 Streamer.emitValue(
1011 KernelDescriptor.private_segment_fixed_size,
1013 Streamer.emitValue(KernelDescriptor.kernarg_size,
1015
1016 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
1017 Streamer.emitInt8(0u);
1018
1019 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
1020 // expression being created is:
1021 // (start of kernel code) - (start of kernel descriptor)
1022 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
1023 Streamer.emitValue(
1025 MCSymbolRefExpr::create(KernelCodeSymbol,
1027 MCSymbolRefExpr::create(KernelDescriptorSymbol,
1028 MCSymbolRefExpr::VK_None, Context),
1029 Context),
1031 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
1032 Streamer.emitInt8(0u);
1033 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
1035 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
1037 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
1039 Streamer.emitValue(
1040 KernelDescriptor.kernel_code_properties,
1042 Streamer.emitValue(KernelDescriptor.kernarg_preload,
1044 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
1045 Streamer.emitInt8(0u);
1046}
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
This is a verifier for AMDGPU HSA metadata, which can verify both well-typed metadata and untyped met...
AMDGPU metadata definitions and in-memory representations.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
std::string Name
uint64_t Size
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
verify safepoint Safepoint IR Verifier
raw_pwrite_stream & OS
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
const char * getVendor() const
void toBlob(unsigned Type, std::string &S)
void toString(std::string &S)
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR) override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:537
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:592
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:622
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:222
const MCAsmInfo * getAsmInfo() const
Definition: MCContext.h:412
ELFObjectWriter & getWriter()
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Streaming machine code generation interface.
Definition: MCStreamer.h:213
MCContext & getContext() const
Definition: MCStreamer.h:300
void emitValue(const MCExpr *Value, unsigned Size, SMLoc Loc=SMLoc())
Definition: MCStreamer.cpp:179
virtual void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc())
Emit a label for Symbol into the current section.
Definition: MCStreamer.cpp:420
void emitInt8(uint64_t Value)
Definition: MCStreamer.h:719
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
StringRef getCPU() const
unsigned getOther() const
void setVisibility(unsigned Visibility)
void setSize(const MCExpr *SS)
Definition: MCSymbolELF.h:23
bool isBindingSet() const
void setBinding(unsigned Binding) const
Definition: MCSymbolELF.cpp:43
unsigned getVisibility() const
unsigned getBinding() const
Definition: MCSymbolELF.cpp:66
void setType(unsigned Type) const
Definition: MCSymbolELF.cpp:94
void setOther(unsigned Other)
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setIndex(uint32_t Value) const
Set the (implementation defined) index.
Definition: MCSymbol.h:321
bool declareCommon(uint64_t Size, Align Alignment, bool Target=false)
Declare this symbol as being 'common'.
Definition: MCSymbol.h:375
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:392
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:383
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:679
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char NoteNameV2[]
Definition: AMDGPUPTNote.h:26
const char SectionName[]
Definition: AMDGPUPTNote.h:24
const char NoteNameV3[]
Definition: AMDGPUPTNote.h:27
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
Definition: TargetParser.h:35
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
GPUKind parseArchR600(StringRef CPU)
@ SHT_NOTE
Definition: ELF.h:1096
@ EF_AMDGPU_GENERIC_VERSION_MAX
Definition: ELF.h:884
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
Definition: ELF.h:861
@ EF_AMDGPU_MACH_AMDGCN_GFX703
Definition: ELF.h:772
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
Definition: ELF.h:796
@ EF_AMDGPU_FEATURE_SRAMECC_V3
Definition: ELF.h:852
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
Definition: ELF.h:790
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
Definition: ELF.h:882
@ EF_AMDGPU_MACH_R600_CAYMAN
Definition: ELF.h:754
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
Definition: ELF.h:872
@ EF_AMDGPU_MACH_AMDGCN_GFX704
Definition: ELF.h:773
@ EF_AMDGPU_MACH_AMDGCN_GFX902
Definition: ELF.h:780
@ EF_AMDGPU_MACH_AMDGCN_GFX810
Definition: ELF.h:778
@ EF_AMDGPU_MACH_AMDGCN_GFX950
Definition: ELF.h:814
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
Definition: ELF.h:804
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
Definition: ELF.h:806
@ EF_AMDGPU_MACH_R600_RV730
Definition: ELF.h:743
@ EF_AMDGPU_MACH_R600_RV710
Definition: ELF.h:742
@ EF_AMDGPU_MACH_AMDGCN_GFX908
Definition: ELF.h:783
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
Definition: ELF.h:787
@ EF_AMDGPU_MACH_R600_CYPRESS
Definition: ELF.h:747
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
Definition: ELF.h:791
@ EF_AMDGPU_MACH_R600_R600
Definition: ELF.h:737
@ EF_AMDGPU_MACH_AMDGCN_GFX940
Definition: ELF.h:799
@ EF_AMDGPU_MACH_AMDGCN_GFX941
Definition: ELF.h:810
@ EF_AMDGPU_MACH_R600_TURKS
Definition: ELF.h:755
@ EF_AMDGPU_MACH_R600_JUNIPER
Definition: ELF.h:748
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
Definition: ELF.h:876
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
Definition: ELF.h:859
@ EF_AMDGPU_MACH_AMDGCN_GFX601
Definition: ELF.h:768
@ EF_AMDGPU_MACH_AMDGCN_GFX942
Definition: ELF.h:811
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
Definition: ELF.h:820
@ EF_AMDGPU_MACH_R600_R630
Definition: ELF.h:738
@ EF_AMDGPU_MACH_R600_REDWOOD
Definition: ELF.h:749
@ EF_AMDGPU_MACH_R600_RV770
Definition: ELF.h:744
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
Definition: ELF.h:863
@ EF_AMDGPU_MACH_AMDGCN_GFX600
Definition: ELF.h:767
@ EF_AMDGPU_FEATURE_XNACK_V3
Definition: ELF.h:847
@ EF_AMDGPU_MACH_AMDGCN_GFX602
Definition: ELF.h:793
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
Definition: ELF.h:805
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
Definition: ELF.h:800
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
Definition: ELF.h:792
@ EF_AMDGPU_MACH_AMDGCN_GFX801
Definition: ELF.h:775
@ EF_AMDGPU_MACH_AMDGCN_GFX705
Definition: ELF.h:794
@ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC
Definition: ELF.h:825
@ EF_AMDGPU_MACH_AMDGCN_GFX1153
Definition: ELF.h:823
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
Definition: ELF.h:786
@ EF_AMDGPU_MACH_R600_RV670
Definition: ELF.h:740
@ EF_AMDGPU_MACH_AMDGCN_GFX701
Definition: ELF.h:770
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
Definition: ELF.h:818
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
Definition: ELF.h:788
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
Definition: ELF.h:809
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
Definition: ELF.h:789
@ EF_AMDGPU_MACH_R600_CEDAR
Definition: ELF.h:746
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
Definition: ELF.h:807
@ EF_AMDGPU_MACH_AMDGCN_GFX700
Definition: ELF.h:769
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
Definition: ELF.h:819
@ EF_AMDGPU_MACH_AMDGCN_GFX803
Definition: ELF.h:777
@ EF_AMDGPU_MACH_AMDGCN_GFX802
Definition: ELF.h:776
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
Definition: ELF.h:785
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
Definition: ELF.h:865
@ EF_AMDGPU_MACH_AMDGCN_GFX900
Definition: ELF.h:779
@ EF_AMDGPU_MACH_AMDGCN_GFX909
Definition: ELF.h:784
@ EF_AMDGPU_MACH_AMDGCN_GFX906
Definition: ELF.h:782
@ EF_AMDGPU_MACH_NONE
Definition: ELF.h:732
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
Definition: ELF.h:816
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
Definition: ELF.h:803
@ EF_AMDGPU_MACH_R600_CAICOS
Definition: ELF.h:753
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
Definition: ELF.h:798
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
Definition: ELF.h:797
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
Definition: ELF.h:801
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
Definition: ELF.h:824
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
Definition: ELF.h:817
@ EF_AMDGPU_MACH_AMDGCN_GFX904
Definition: ELF.h:781
@ EF_AMDGPU_MACH_R600_RS880
Definition: ELF.h:739
@ EF_AMDGPU_MACH_AMDGCN_GFX805
Definition: ELF.h:795
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
Definition: ELF.h:813
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
Definition: ELF.h:802
@ EF_AMDGPU_MACH_R600_SUMO
Definition: ELF.h:750
@ EF_AMDGPU_MACH_R600_BARTS
Definition: ELF.h:752
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
Definition: ELF.h:874
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
Definition: ELF.h:878
@ EF_AMDGPU_MACH_AMDGCN_GFX702
Definition: ELF.h:771
@ SHF_ALLOC
Definition: ELF.h:1188
@ STB_GLOBAL
Definition: ELF.h:1342
@ NT_AMDGPU_METADATA
Definition: ELF.h:1913
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1367
@ STT_OBJECT
Definition: ELF.h:1354
@ STV_PROTECTED
Definition: ELF.h:1374
@ STV_DEFAULT
Definition: ELF.h:1371
@ SHN_AMDGPU_LDS
Definition: ELF.h:1896
@ NT_AMD_HSA_ISA_NAME
Definition: ELF.h:1906
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
const char * toString(DWARFSectionKind Kind)
Instruction set architecture version.
Definition: TargetParser.h:130
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85