LLVM 20.0.0git
AMDGPUTargetStreamer.cpp
Go to the documentation of this file.
1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
14#include "AMDGPUMCExpr.h"
16#include "AMDGPUPTNote.h"
21#include "llvm/MC/MCAssembler.h"
22#include "llvm/MC/MCContext.h"
32
33using namespace llvm;
34using namespace llvm::AMDGPU;
35
36//===----------------------------------------------------------------------===//
37// AMDGPUTargetStreamer
38//===----------------------------------------------------------------------===//
39
41 ForceGenericVersion("amdgpu-force-generic-version",
42 cl::desc("Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
45
47 msgpack::Document HSAMetadataDoc;
48 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
49 return false;
50 return EmitHSAMetadata(HSAMetadataDoc, false);
51}
52
55
56 // clang-format off
57 switch (ElfMach) {
58 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
59 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
69 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
127 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
128 default: AK = GK_NONE; break;
129 }
130 // clang-format on
131
132 StringRef GPUName = getArchNameAMDGCN(AK);
133 if (GPUName != "")
134 return GPUName;
135 return getArchNameR600(AK);
136}
137
140 if (AK == AMDGPU::GPUKind::GK_NONE)
141 AK = parseArchR600(GPU);
142
143 // clang-format off
144 switch (AK) {
215 }
216 // clang-format on
217
218 llvm_unreachable("unknown GPU");
219}
220
221//===----------------------------------------------------------------------===//
222// AMDGPUTargetAsmStreamer
223//===----------------------------------------------------------------------===//
224
227 : AMDGPUTargetStreamer(S), OS(OS) { }
228
229// A hook for emitting stuff at the end.
230// We use it for emitting the accumulated PAL metadata as directives.
231// The PAL metadata is reset after it is emitted.
233 std::string S;
235 OS << S;
236
237 // Reset the pal metadata so its data will not affect a compilation that
238 // reuses this object.
240}
241
243 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
244}
245
247 unsigned COV) {
249 OS << "\t.amdhsa_code_object_version " << COV << '\n';
250}
251
253 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
254 const MCAsmInfo *MAI) {
256 };
257
258 OS << "\t.amd_kernel_code_t\n";
259 Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint);
260 OS << "\t.end_amd_kernel_code_t\n";
261}
262
264 unsigned Type) {
265 switch (Type) {
266 default: llvm_unreachable("Invalid AMDGPU symbol type");
268 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
269 break;
270 }
271}
272
274 Align Alignment) {
275 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
276 << Alignment.value() << '\n';
277}
278
280 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR,
281 const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize,
282 const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch,
283 const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion,
284 const MCSymbol *HasIndirectCall) {
285#define PRINT_RES_INFO(ARG) \
286 OS << "\t.set "; \
287 ARG->print(OS, getContext().getAsmInfo()); \
288 OS << ", "; \
289 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \
290 Streamer.addBlankLine();
291
292 PRINT_RES_INFO(NumVGPR);
293 PRINT_RES_INFO(NumAGPR);
294 PRINT_RES_INFO(NumExplicitSGPR);
295 PRINT_RES_INFO(PrivateSegmentSize);
296 PRINT_RES_INFO(UsesVCC);
297 PRINT_RES_INFO(UsesFlatScratch);
298 PRINT_RES_INFO(HasDynamicallySizedStack);
299 PRINT_RES_INFO(HasRecursion);
300 PRINT_RES_INFO(HasIndirectCall);
301#undef PRINT_RES_INFO
302}
303
305 const MCSymbol *MaxAGPR,
306 const MCSymbol *MaxSGPR) {
307#define PRINT_RES_INFO(ARG) \
308 OS << "\t.set "; \
309 ARG->print(OS, getContext().getAsmInfo()); \
310 OS << ", "; \
311 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \
312 Streamer.addBlankLine();
313
314 PRINT_RES_INFO(MaxVGPR);
315 PRINT_RES_INFO(MaxAGPR);
316 PRINT_RES_INFO(MaxSGPR);
317#undef PRINT_RES_INFO
318}
319
321 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
322 return true;
323}
324
326 msgpack::Document &HSAMetadataDoc, bool Strict) {
328 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
329 return false;
330
331 std::string HSAMetadataString;
332 raw_string_ostream StrOS(HSAMetadataString);
333 HSAMetadataDoc.toYAML(StrOS);
334
335 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
336 OS << StrOS.str() << '\n';
337 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
338 return true;
339}
340
342 const uint32_t Encoded_s_code_end = 0xbf9f0000;
343 const uint32_t Encoded_s_nop = 0xbf800000;
344 uint32_t Encoded_pad = Encoded_s_code_end;
345
346 // Instruction cache line size in bytes.
347 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
348 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
349
350 // Extra padding amount in bytes to support prefetch mode 3.
351 unsigned FillSize = 3 * CacheLineSize;
352
353 if (AMDGPU::isGFX90A(STI)) {
354 Encoded_pad = Encoded_s_nop;
355 FillSize = 16 * CacheLineSize;
356 }
357
358 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
359 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
360 return true;
361}
362
364 const MCSubtargetInfo &STI, StringRef KernelName,
365 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
366 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
367 const MCExpr *ReserveFlatScr) {
368 IsaVersion IVersion = getIsaVersion(STI.getCPU());
369 const MCAsmInfo *MAI = getContext().getAsmInfo();
370
371 OS << "\t.amdhsa_kernel " << KernelName << '\n';
372
373 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
375 OS << "\t\t" << Directive << ' ';
376 const MCExpr *ShiftedAndMaskedExpr =
377 MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
378 const MCExpr *New = foldAMDGPUMCExpr(ShiftedAndMaskedExpr, getContext());
379 printAMDGPUMCExpr(New, OS, MAI);
380 OS << '\n';
381 };
382
383 auto EmitMCExpr = [&](const MCExpr *Value) {
385 printAMDGPUMCExpr(NewExpr, OS, MAI);
386 };
387
388 OS << "\t\t.amdhsa_group_segment_fixed_size ";
389 EmitMCExpr(KD.group_segment_fixed_size);
390 OS << '\n';
391
392 OS << "\t\t.amdhsa_private_segment_fixed_size ";
393 EmitMCExpr(KD.private_segment_fixed_size);
394 OS << '\n';
395
396 OS << "\t\t.amdhsa_kernarg_size ";
397 EmitMCExpr(KD.kernarg_size);
398 OS << '\n';
399
401 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
402 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
403
407 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
408 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
409 ".amdhsa_user_sgpr_private_segment_buffer");
411 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
412 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
413 ".amdhsa_user_sgpr_dispatch_ptr");
415 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
416 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
417 ".amdhsa_user_sgpr_queue_ptr");
419 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
420 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
421 ".amdhsa_user_sgpr_kernarg_segment_ptr");
423 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
425 ".amdhsa_user_sgpr_dispatch_id");
428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
429 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
430 ".amdhsa_user_sgpr_flat_scratch_init");
431 if (hasKernargPreload(STI)) {
432 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
433 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
434 ".amdhsa_user_sgpr_kernarg_preload_length");
435 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
436 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
437 ".amdhsa_user_sgpr_kernarg_preload_offset");
438 }
441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
442 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
443 ".amdhsa_user_sgpr_private_segment_size");
444 if (IVersion.Major >= 10)
446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
447 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
448 ".amdhsa_wavefront_size32");
451 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
452 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
453 ".amdhsa_uses_dynamic_stack");
455 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
456 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
458 ? ".amdhsa_enable_private_segment"
459 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
461 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
462 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
463 ".amdhsa_system_sgpr_workgroup_id_x");
465 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
466 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
467 ".amdhsa_system_sgpr_workgroup_id_y");
469 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
470 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
471 ".amdhsa_system_sgpr_workgroup_id_z");
473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
474 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
475 ".amdhsa_system_sgpr_workgroup_info");
477 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
479 ".amdhsa_system_vgpr_workitem_id");
480
481 // These directives are required.
482 OS << "\t\t.amdhsa_next_free_vgpr ";
483 EmitMCExpr(NextVGPR);
484 OS << '\n';
485
486 OS << "\t\t.amdhsa_next_free_sgpr ";
487 EmitMCExpr(NextSGPR);
488 OS << '\n';
489
490 if (AMDGPU::isGFX90A(STI)) {
491 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
492 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
494 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
495 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
496 accum_bits = MCBinaryExpr::createAdd(
497 accum_bits, MCConstantExpr::create(1, getContext()), getContext());
498 accum_bits = MCBinaryExpr::createMul(
499 accum_bits, MCConstantExpr::create(4, getContext()), getContext());
500 OS << "\t\t.amdhsa_accum_offset ";
501 const MCExpr *New = foldAMDGPUMCExpr(accum_bits, getContext());
502 printAMDGPUMCExpr(New, OS, MAI);
503 OS << '\n';
504 }
505
506 OS << "\t\t.amdhsa_reserve_vcc ";
507 EmitMCExpr(ReserveVCC);
508 OS << '\n';
509
510 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
511 OS << "\t\t.amdhsa_reserve_flat_scratch ";
512 EmitMCExpr(ReserveFlatScr);
513 OS << '\n';
514 }
515
516 switch (CodeObjectVersion) {
517 default:
518 break;
521 if (getTargetID()->isXnackSupported())
522 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
523 break;
524 }
525
527 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
528 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
529 ".amdhsa_float_round_mode_32");
531 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
532 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
533 ".amdhsa_float_round_mode_16_64");
535 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
536 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
537 ".amdhsa_float_denorm_mode_32");
539 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
540 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
541 ".amdhsa_float_denorm_mode_16_64");
542 if (IVersion.Major < 12) {
544 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
545 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
546 ".amdhsa_dx10_clamp");
548 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
549 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
550 ".amdhsa_ieee_mode");
551 }
552 if (IVersion.Major >= 9) {
554 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
555 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
556 ".amdhsa_fp16_overflow");
557 }
558 if (AMDGPU::isGFX90A(STI))
560 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
561 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
562 if (IVersion.Major >= 10) {
564 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
565 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
566 ".amdhsa_workgroup_processor_mode");
568 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
569 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
570 ".amdhsa_memory_ordered");
572 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
573 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
574 ".amdhsa_forward_progress");
575 }
576 if (IVersion.Major >= 10 && IVersion.Major < 12) {
578 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
579 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
580 ".amdhsa_shared_vgpr_count");
581 }
582 if (IVersion.Major >= 12) {
584 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
585 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
586 ".amdhsa_round_robin_scheduling");
587 }
590 amdhsa::
591 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
592 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
593 ".amdhsa_exception_fp_ieee_invalid_op");
596 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
597 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
598 ".amdhsa_exception_fp_denorm_src");
601 amdhsa::
602 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
603 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
604 ".amdhsa_exception_fp_ieee_div_zero");
607 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
608 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
609 ".amdhsa_exception_fp_ieee_overflow");
612 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
613 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
614 ".amdhsa_exception_fp_ieee_underflow");
617 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
618 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
619 ".amdhsa_exception_fp_ieee_inexact");
622 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
623 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
624 ".amdhsa_exception_int_div_zero");
625
626 OS << "\t.end_amdhsa_kernel\n";
627}
628
629//===----------------------------------------------------------------------===//
630// AMDGPUTargetELFStreamer
631//===----------------------------------------------------------------------===//
632
634 const MCSubtargetInfo &STI)
635 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
636
638 return static_cast<MCELFStreamer &>(Streamer);
639}
640
641// A hook for emitting stuff at the end.
642// We use it for emitting the accumulated PAL metadata as a .note record.
643// The PAL metadata is reset after it is emitted.
646 W.setELFHeaderEFlags(getEFlags());
647 W.setOverrideABIVersion(
649
650 std::string Blob;
651 const char *Vendor = getPALMetadata()->getVendor();
652 unsigned Type = getPALMetadata()->getType();
653 getPALMetadata()->toBlob(Type, Blob);
654 if (Blob.empty())
655 return;
656 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
657 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
658
659 // Reset the pal metadata so its data will not affect a compilation that
660 // reuses this object.
662}
663
664void AMDGPUTargetELFStreamer::EmitNote(
665 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
666 function_ref<void(MCELFStreamer &)> EmitDesc) {
667 auto &S = getStreamer();
668 auto &Context = S.getContext();
669
670 auto NameSZ = Name.size() + 1;
671
672 unsigned NoteFlags = 0;
673 // TODO Apparently, this is currently needed for OpenCL as mentioned in
674 // https://reviews.llvm.org/D74995
675 if (isHsaAbi(STI))
676 NoteFlags = ELF::SHF_ALLOC;
677
678 S.pushSection();
679 S.switchSection(
680 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
681 S.emitInt32(NameSZ); // namesz
682 S.emitValue(DescSZ, 4); // descz
683 S.emitInt32(NoteType); // type
684 S.emitBytes(Name); // name
685 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
686 EmitDesc(S); // desc
687 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
688 S.popSection();
689}
690
691unsigned AMDGPUTargetELFStreamer::getEFlags() {
692 switch (STI.getTargetTriple().getArch()) {
693 default:
694 llvm_unreachable("Unsupported Arch");
695 case Triple::r600:
696 return getEFlagsR600();
697 case Triple::amdgcn:
698 return getEFlagsAMDGCN();
699 }
700}
701
702unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
704
705 return getElfMach(STI.getCPU());
706}
707
708unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
710
711 switch (STI.getTargetTriple().getOS()) {
712 default:
713 // TODO: Why are some tests have "mingw" listed as OS?
714 // llvm_unreachable("Unsupported OS");
716 return getEFlagsUnknownOS();
717 case Triple::AMDHSA:
718 return getEFlagsAMDHSA();
719 case Triple::AMDPAL:
720 return getEFlagsAMDPAL();
721 case Triple::Mesa3D:
722 return getEFlagsMesa3D();
723 }
724}
725
726unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
727 // TODO: Why are some tests have "mingw" listed as OS?
728 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
729
730 return getEFlagsV3();
731}
732
733unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
734 assert(isHsaAbi(STI));
735
736 if (CodeObjectVersion >= 6)
737 return getEFlagsV6();
738 return getEFlagsV4();
739}
740
741unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
743
744 return getEFlagsV3();
745}
746
747unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
749
750 return getEFlagsV3();
751}
752
753unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
754 unsigned EFlagsV3 = 0;
755
756 // mach.
757 EFlagsV3 |= getElfMach(STI.getCPU());
758
759 // xnack.
760 if (getTargetID()->isXnackOnOrAny())
762 // sramecc.
763 if (getTargetID()->isSramEccOnOrAny())
765
766 return EFlagsV3;
767}
768
769unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
770 unsigned EFlagsV4 = 0;
771
772 // mach.
773 EFlagsV4 |= getElfMach(STI.getCPU());
774
775 // xnack.
776 switch (getTargetID()->getXnackSetting()) {
779 break;
782 break;
785 break;
788 break;
789 }
790 // sramecc.
791 switch (getTargetID()->getSramEccSetting()) {
794 break;
797 break;
800 break;
803 break;
804 }
805
806 return EFlagsV4;
807}
808
809unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
810 unsigned Flags = getEFlagsV4();
811
812 unsigned Version = ForceGenericVersion;
813 if (!Version) {
814 switch (parseArchAMDGCN(STI.getCPU())) {
817 break;
820 break;
823 break;
826 break;
829 break;
832 break;
833 default:
834 break;
835 }
836 }
837
838 // Versions start at 1.
839 if (Version) {
841 report_fatal_error("Cannot encode generic code object version " +
842 Twine(Version) +
843 " - no ELF flag can represent this version!");
845 }
846
847 return Flags;
848}
849
851
854 OS.pushSection();
855 Header.EmitKernelCodeT(OS, getContext());
856 OS.popSection();
857}
858
860 unsigned Type) {
861 MCSymbolELF *Symbol = cast<MCSymbolELF>(
862 getStreamer().getContext().getOrCreateSymbol(SymbolName));
863 Symbol->setType(Type);
864}
865
867 Align Alignment) {
868 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
869 SymbolELF->setType(ELF::STT_OBJECT);
870
871 if (!SymbolELF->isBindingSet())
872 SymbolELF->setBinding(ELF::STB_GLOBAL);
873
874 if (SymbolELF->declareCommon(Size, Alignment, true)) {
875 report_fatal_error("Symbol: " + Symbol->getName() +
876 " redeclared as different type");
877 }
878
879 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
881}
882
884 // Create two labels to mark the beginning and end of the desc field
885 // and a MCExpr to calculate the size of the desc field.
886 auto &Context = getContext();
887 auto *DescBegin = Context.createTempSymbol();
888 auto *DescEnd = Context.createTempSymbol();
889 auto *DescSZ = MCBinaryExpr::createSub(
890 MCSymbolRefExpr::create(DescEnd, Context),
891 MCSymbolRefExpr::create(DescBegin, Context), Context);
892
894 [&](MCELFStreamer &OS) {
895 OS.emitLabel(DescBegin);
896 OS.emitBytes(getTargetID()->toString());
897 OS.emitLabel(DescEnd);
898 });
899 return true;
900}
901
903 bool Strict) {
905 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
906 return false;
907
908 std::string HSAMetadataString;
909 HSAMetadataDoc.writeToBlob(HSAMetadataString);
910
911 // Create two labels to mark the beginning and end of the desc field
912 // and a MCExpr to calculate the size of the desc field.
913 auto &Context = getContext();
914 auto *DescBegin = Context.createTempSymbol();
915 auto *DescEnd = Context.createTempSymbol();
916 auto *DescSZ = MCBinaryExpr::createSub(
917 MCSymbolRefExpr::create(DescEnd, Context),
918 MCSymbolRefExpr::create(DescBegin, Context), Context);
919
921 [&](MCELFStreamer &OS) {
922 OS.emitLabel(DescBegin);
923 OS.emitBytes(HSAMetadataString);
924 OS.emitLabel(DescEnd);
925 });
926 return true;
927}
928
930 const uint32_t Encoded_s_code_end = 0xbf9f0000;
931 const uint32_t Encoded_s_nop = 0xbf800000;
932 uint32_t Encoded_pad = Encoded_s_code_end;
933
934 // Instruction cache line size in bytes.
935 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
936 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
937
938 // Extra padding amount in bytes to support prefetch mode 3.
939 unsigned FillSize = 3 * CacheLineSize;
940
941 if (AMDGPU::isGFX90A(STI)) {
942 Encoded_pad = Encoded_s_nop;
943 FillSize = 16 * CacheLineSize;
944 }
945
947 OS.pushSection();
948 OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4);
949 for (unsigned I = 0; I < FillSize; I += 4)
950 OS.emitInt32(Encoded_pad);
951 OS.popSection();
952 return true;
953}
954
956 const MCSubtargetInfo &STI, StringRef KernelName,
957 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
958 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
959 const MCExpr *ReserveFlatScr) {
960 auto &Streamer = getStreamer();
961 auto &Context = Streamer.getContext();
962
963 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
964 Context.getOrCreateSymbol(Twine(KernelName)));
965 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
966 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
967
968 // Copy kernel descriptor symbol's binding, other and visibility from the
969 // kernel code symbol.
970 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
971 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
972 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
973 // Kernel descriptor symbol's type and size are fixed.
974 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
975 KernelDescriptorSymbol->setSize(
977
978 // The visibility of the kernel code symbol must be protected or less to allow
979 // static relocations from the kernel descriptor to be used.
980 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
981 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
982
983 Streamer.emitLabel(KernelDescriptorSymbol);
984 Streamer.emitValue(
985 KernelDescriptor.group_segment_fixed_size,
987 Streamer.emitValue(
988 KernelDescriptor.private_segment_fixed_size,
990 Streamer.emitValue(KernelDescriptor.kernarg_size,
992
993 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
994 Streamer.emitInt8(0u);
995
996 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
997 // expression being created is:
998 // (start of kernel code) - (start of kernel descriptor)
999 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
1000 Streamer.emitValue(
1002 MCSymbolRefExpr::create(KernelCodeSymbol,
1004 MCSymbolRefExpr::create(KernelDescriptorSymbol,
1005 MCSymbolRefExpr::VK_None, Context),
1006 Context),
1008 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
1009 Streamer.emitInt8(0u);
1010 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
1012 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
1014 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
1016 Streamer.emitValue(
1017 KernelDescriptor.kernel_code_properties,
1019 Streamer.emitValue(KernelDescriptor.kernarg_preload,
1021 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
1022 Streamer.emitInt8(0u);
1023}
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
This is a verifier for AMDGPU HSA metadata, which can verify both well-typed metadata and untyped met...
AMDGPU metadata definitions and in-memory representations.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
std::string Name
uint64_t Size
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
verify safepoint Safepoint IR Verifier
raw_pwrite_stream & OS
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
const char * getVendor() const
void toBlob(unsigned Type, std::string &S)
void toString(std::string &S)
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR) override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:537
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:592
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:622
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:222
const MCAsmInfo * getAsmInfo() const
Definition: MCContext.h:412
ELFObjectWriter & getWriter()
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Streaming machine code generation interface.
Definition: MCStreamer.h:213
MCContext & getContext() const
Definition: MCStreamer.h:300
void emitValue(const MCExpr *Value, unsigned Size, SMLoc Loc=SMLoc())
Definition: MCStreamer.cpp:179
virtual void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc())
Emit a label for Symbol into the current section.
Definition: MCStreamer.cpp:420
void emitInt8(uint64_t Value)
Definition: MCStreamer.h:727
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
StringRef getCPU() const
unsigned getOther() const
void setVisibility(unsigned Visibility)
void setSize(const MCExpr *SS)
Definition: MCSymbolELF.h:23
bool isBindingSet() const
void setBinding(unsigned Binding) const
Definition: MCSymbolELF.cpp:43
unsigned getVisibility() const
unsigned getBinding() const
Definition: MCSymbolELF.cpp:66
void setType(unsigned Type) const
Definition: MCSymbolELF.cpp:94
void setOther(unsigned Other)
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setIndex(uint32_t Value) const
Set the (implementation defined) index.
Definition: MCSymbol.h:321
bool declareCommon(uint64_t Size, Align Alignment, bool Target=false)
Declare this symbol as being 'common'.
Definition: MCSymbol.h:375
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:392
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:383
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:679
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char NoteNameV2[]
Definition: AMDGPUPTNote.h:26
const char SectionName[]
Definition: AMDGPUPTNote.h:24
const char NoteNameV3[]
Definition: AMDGPUPTNote.h:27
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
Definition: TargetParser.h:35
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
GPUKind parseArchR600(StringRef CPU)
@ SHT_NOTE
Definition: ELF.h:1104
@ EF_AMDGPU_GENERIC_VERSION_MAX
Definition: ELF.h:892
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
Definition: ELF.h:869
@ EF_AMDGPU_MACH_AMDGCN_GFX703
Definition: ELF.h:780
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
Definition: ELF.h:804
@ EF_AMDGPU_FEATURE_SRAMECC_V3
Definition: ELF.h:860
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
Definition: ELF.h:798
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
Definition: ELF.h:890
@ EF_AMDGPU_MACH_R600_CAYMAN
Definition: ELF.h:762
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
Definition: ELF.h:880
@ EF_AMDGPU_MACH_AMDGCN_GFX704
Definition: ELF.h:781
@ EF_AMDGPU_MACH_AMDGCN_GFX902
Definition: ELF.h:788
@ EF_AMDGPU_MACH_AMDGCN_GFX810
Definition: ELF.h:786
@ EF_AMDGPU_MACH_AMDGCN_GFX950
Definition: ELF.h:822
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
Definition: ELF.h:812
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
Definition: ELF.h:814
@ EF_AMDGPU_MACH_R600_RV730
Definition: ELF.h:751
@ EF_AMDGPU_MACH_R600_RV710
Definition: ELF.h:750
@ EF_AMDGPU_MACH_AMDGCN_GFX908
Definition: ELF.h:791
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
Definition: ELF.h:795
@ EF_AMDGPU_MACH_R600_CYPRESS
Definition: ELF.h:755
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
Definition: ELF.h:799
@ EF_AMDGPU_MACH_R600_R600
Definition: ELF.h:745
@ EF_AMDGPU_MACH_AMDGCN_GFX940
Definition: ELF.h:807
@ EF_AMDGPU_MACH_AMDGCN_GFX941
Definition: ELF.h:818
@ EF_AMDGPU_MACH_R600_TURKS
Definition: ELF.h:763
@ EF_AMDGPU_MACH_R600_JUNIPER
Definition: ELF.h:756
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
Definition: ELF.h:884
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
Definition: ELF.h:867
@ EF_AMDGPU_MACH_AMDGCN_GFX601
Definition: ELF.h:776
@ EF_AMDGPU_MACH_AMDGCN_GFX942
Definition: ELF.h:819
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
Definition: ELF.h:828
@ EF_AMDGPU_MACH_R600_R630
Definition: ELF.h:746
@ EF_AMDGPU_MACH_R600_REDWOOD
Definition: ELF.h:757
@ EF_AMDGPU_MACH_R600_RV770
Definition: ELF.h:752
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
Definition: ELF.h:871
@ EF_AMDGPU_MACH_AMDGCN_GFX600
Definition: ELF.h:775
@ EF_AMDGPU_FEATURE_XNACK_V3
Definition: ELF.h:855
@ EF_AMDGPU_MACH_AMDGCN_GFX602
Definition: ELF.h:801
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
Definition: ELF.h:813
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
Definition: ELF.h:808
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
Definition: ELF.h:800
@ EF_AMDGPU_MACH_AMDGCN_GFX801
Definition: ELF.h:783
@ EF_AMDGPU_MACH_AMDGCN_GFX705
Definition: ELF.h:802
@ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC
Definition: ELF.h:833
@ EF_AMDGPU_MACH_AMDGCN_GFX1153
Definition: ELF.h:831
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
Definition: ELF.h:794
@ EF_AMDGPU_MACH_R600_RV670
Definition: ELF.h:748
@ EF_AMDGPU_MACH_AMDGCN_GFX701
Definition: ELF.h:778
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
Definition: ELF.h:826
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
Definition: ELF.h:796
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
Definition: ELF.h:817
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
Definition: ELF.h:797
@ EF_AMDGPU_MACH_R600_CEDAR
Definition: ELF.h:754
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
Definition: ELF.h:815
@ EF_AMDGPU_MACH_AMDGCN_GFX700
Definition: ELF.h:777
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
Definition: ELF.h:827
@ EF_AMDGPU_MACH_AMDGCN_GFX803
Definition: ELF.h:785
@ EF_AMDGPU_MACH_AMDGCN_GFX802
Definition: ELF.h:784
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
Definition: ELF.h:793
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
Definition: ELF.h:873
@ EF_AMDGPU_MACH_AMDGCN_GFX900
Definition: ELF.h:787
@ EF_AMDGPU_MACH_AMDGCN_GFX909
Definition: ELF.h:792
@ EF_AMDGPU_MACH_AMDGCN_GFX906
Definition: ELF.h:790
@ EF_AMDGPU_MACH_NONE
Definition: ELF.h:740
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
Definition: ELF.h:824
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
Definition: ELF.h:811
@ EF_AMDGPU_MACH_R600_CAICOS
Definition: ELF.h:761
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
Definition: ELF.h:806
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
Definition: ELF.h:805
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
Definition: ELF.h:809
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
Definition: ELF.h:832
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
Definition: ELF.h:825
@ EF_AMDGPU_MACH_AMDGCN_GFX904
Definition: ELF.h:789
@ EF_AMDGPU_MACH_R600_RS880
Definition: ELF.h:747
@ EF_AMDGPU_MACH_AMDGCN_GFX805
Definition: ELF.h:803
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
Definition: ELF.h:821
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
Definition: ELF.h:810
@ EF_AMDGPU_MACH_R600_SUMO
Definition: ELF.h:758
@ EF_AMDGPU_MACH_R600_BARTS
Definition: ELF.h:760
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
Definition: ELF.h:882
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
Definition: ELF.h:886
@ EF_AMDGPU_MACH_AMDGCN_GFX702
Definition: ELF.h:779
@ SHF_ALLOC
Definition: ELF.h:1196
@ STB_GLOBAL
Definition: ELF.h:1350
@ NT_AMDGPU_METADATA
Definition: ELF.h:1921
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1375
@ STT_OBJECT
Definition: ELF.h:1362
@ STV_PROTECTED
Definition: ELF.h:1382
@ STV_DEFAULT
Definition: ELF.h:1379
@ SHN_AMDGPU_LDS
Definition: ELF.h:1904
@ NT_AMD_HSA_ISA_NAME
Definition: ELF.h:1914
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
const char * toString(DWARFSectionKind Kind)
Instruction set architecture version.
Definition: TargetParser.h:130
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85