43 cl::desc(
"Force a specific generic_v<N> flag to be "
44 "added. For testing purposes only."),
49 if (!HSAMetadataDoc.
fromYAML(HSAMetadataString))
238 OS <<
"\t.amdgcn_target \"" <<
getTargetID()->toString() <<
"\"\n";
244 OS <<
"\t.amdhsa_code_object_version " << COV <<
'\n';
253 OS <<
"\t.amd_kernel_code_t\n";
255 OS <<
"\t.end_amd_kernel_code_t\n";
263 OS <<
"\t.amdgpu_hsa_kernel " << SymbolName <<
'\n' ;
270 OS <<
"\t.amdgpu_lds " << Symbol->getName() <<
", " <<
Size <<
", "
271 << Alignment.
value() <<
'\n';
275 OS <<
"\t.amd_amdgpu_isa \"" <<
getTargetID()->toString() <<
"\"\n";
285 std::string HSAMetadataString;
287 HSAMetadataDoc.
toYAML(StrOS);
290 OS << StrOS.
str() <<
'\n';
297 OS << (TrapEnabled ?
"\ts_trap 2" :
"\ts_endpgm")
298 <<
" ; Kernarg preload header. Trap with incompatible firmware that "
299 "doesn't support preloading kernel arguments.\n";
300 OS <<
"\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
305 const uint32_t Encoded_s_code_end = 0xbf9f0000;
306 const uint32_t Encoded_s_nop = 0xbf800000;
307 uint32_t Encoded_pad = Encoded_s_code_end;
317 Encoded_pad = Encoded_s_nop;
321 OS <<
"\t.p2alignl " << Log2CacheLineSize <<
", " << Encoded_pad <<
'\n';
322 OS <<
"\t.fill " << (FillSize / 4) <<
", 4, " << Encoded_pad <<
'\n';
330 const MCExpr *ReserveFlatScr) {
334 OS <<
"\t.amdhsa_kernel " << KernelName <<
'\n';
339 const MCExpr *ShiftedAndMaskedExpr =
351 OS <<
"\t\t.amdhsa_group_segment_fixed_size ";
355 OS <<
"\t\t.amdhsa_private_segment_fixed_size ";
359 OS <<
"\t\t.amdhsa_kernarg_size ";
365 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
".amdhsa_user_sgpr_count");
370 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
371 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
372 ".amdhsa_user_sgpr_private_segment_buffer");
374 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
375 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
376 ".amdhsa_user_sgpr_dispatch_ptr");
378 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
379 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
380 ".amdhsa_user_sgpr_queue_ptr");
382 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
383 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
384 ".amdhsa_user_sgpr_kernarg_segment_ptr");
386 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
387 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
388 ".amdhsa_user_sgpr_dispatch_id");
391 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
392 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
393 ".amdhsa_user_sgpr_flat_scratch_init");
396 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
397 ".amdhsa_user_sgpr_kernarg_preload_length");
399 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
400 ".amdhsa_user_sgpr_kernarg_preload_offset");
404 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
405 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
406 ".amdhsa_user_sgpr_private_segment_size");
407 if (IVersion.
Major >= 10)
409 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
410 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
411 ".amdhsa_wavefront_size32");
414 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
415 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
416 ".amdhsa_uses_dynamic_stack");
418 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
419 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
421 ?
".amdhsa_enable_private_segment"
422 :
".amdhsa_system_sgpr_private_segment_wavefront_offset"));
424 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
425 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
426 ".amdhsa_system_sgpr_workgroup_id_x");
428 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
429 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
430 ".amdhsa_system_sgpr_workgroup_id_y");
432 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
433 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
434 ".amdhsa_system_sgpr_workgroup_id_z");
436 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
437 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
438 ".amdhsa_system_sgpr_workgroup_info");
440 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
441 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
442 ".amdhsa_system_vgpr_workitem_id");
445 OS <<
"\t\t.amdhsa_next_free_vgpr ";
446 EmitMCExpr(NextVGPR);
449 OS <<
"\t\t.amdhsa_next_free_sgpr ";
450 EmitMCExpr(NextSGPR);
457 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
458 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
getContext());
463 OS <<
"\t\t.amdhsa_accum_offset ";
469 OS <<
"\t\t.amdhsa_reserve_vcc ";
470 EmitMCExpr(ReserveVCC);
474 OS <<
"\t\t.amdhsa_reserve_flat_scratch ";
475 EmitMCExpr(ReserveFlatScr);
485 OS <<
"\t\t.amdhsa_reserve_xnack_mask " <<
getTargetID()->isXnackOnOrAny() <<
'\n';
490 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
491 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
492 ".amdhsa_float_round_mode_32");
494 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
495 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
496 ".amdhsa_float_round_mode_16_64");
498 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
499 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
500 ".amdhsa_float_denorm_mode_32");
502 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
503 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
504 ".amdhsa_float_denorm_mode_16_64");
505 if (IVersion.
Major < 12) {
507 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
508 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
509 ".amdhsa_dx10_clamp");
511 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
512 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
513 ".amdhsa_ieee_mode");
515 if (IVersion.
Major >= 9) {
517 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
518 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
519 ".amdhsa_fp16_overflow");
523 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
524 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
".amdhsa_tg_split");
525 if (IVersion.
Major >= 10) {
527 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
528 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
529 ".amdhsa_workgroup_processor_mode");
531 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
532 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
533 ".amdhsa_memory_ordered");
535 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
536 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
537 ".amdhsa_forward_progress");
539 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
541 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
542 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
543 ".amdhsa_shared_vgpr_count");
545 if (IVersion.
Major >= 12) {
547 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
548 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
549 ".amdhsa_round_robin_scheduling");
554 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
555 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
556 ".amdhsa_exception_fp_ieee_invalid_op");
559 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
560 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
561 ".amdhsa_exception_fp_denorm_src");
565 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
566 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
567 ".amdhsa_exception_fp_ieee_div_zero");
570 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
571 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
572 ".amdhsa_exception_fp_ieee_overflow");
575 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
576 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
577 ".amdhsa_exception_fp_ieee_underflow");
580 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
581 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
582 ".amdhsa_exception_fp_ieee_inexact");
585 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
586 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
587 ".amdhsa_exception_int_div_zero");
589 OS <<
"\t.end_amdhsa_kernel\n";
609 W.setELFHeaderEFlags(getEFlags());
610 W.setOverrideABIVersion(
627void AMDGPUTargetELFStreamer::EmitNote(
631 auto &Context = S.getContext();
633 auto NameSZ =
Name.size() + 1;
635 unsigned NoteFlags = 0;
645 S.emitValue(DescSZ, 4);
646 S.emitInt32(NoteType);
648 S.emitValueToAlignment(
Align(4), 0, 1, 0);
650 S.emitValueToAlignment(
Align(4), 0, 1, 0);
654unsigned AMDGPUTargetELFStreamer::getEFlags() {
659 return getEFlagsR600();
661 return getEFlagsAMDGCN();
665unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
671unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
679 return getEFlagsUnknownOS();
681 return getEFlagsAMDHSA();
683 return getEFlagsAMDPAL();
685 return getEFlagsMesa3D();
689unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
693 return getEFlagsV3();
696unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
700 return getEFlagsV6();
701 return getEFlagsV4();
704unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
707 return getEFlagsV3();
710unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
713 return getEFlagsV3();
716unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
717 unsigned EFlagsV3 = 0;
732unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
733 unsigned EFlagsV4 = 0;
772unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
773 unsigned Flags = getEFlagsV4();
803 " - no ELF flag can represent this version!");
823 Symbol->setType(
Type);
828 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
836 " redeclared as different type");
847 auto *DescBegin = Context.createTempSymbol();
848 auto *DescEnd = Context.createTempSymbol();
855 OS.emitLabel(DescBegin);
857 OS.emitLabel(DescEnd);
868 std::string HSAMetadataString;
874 auto *DescBegin = Context.createTempSymbol();
875 auto *DescEnd = Context.createTempSymbol();
882 OS.emitLabel(DescBegin);
883 OS.emitBytes(HSAMetadataString);
884 OS.emitLabel(DescEnd);
891 const uint32_t Encoded_s_nop = 0xbf800000;
892 const uint32_t Encoded_s_trap = 0xbf920002;
893 const uint32_t Encoded_s_endpgm = 0xbf810000;
894 const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm;
896 OS.emitInt32(TrapInstr);
897 for (
int i = 0; i < 63; ++i) {
898 OS.emitInt32(Encoded_s_nop);
904 const uint32_t Encoded_s_code_end = 0xbf9f0000;
905 const uint32_t Encoded_s_nop = 0xbf800000;
906 uint32_t Encoded_pad = Encoded_s_code_end;
916 Encoded_pad = Encoded_s_nop;
923 for (
unsigned I = 0;
I < FillSize;
I += 4)
924 OS.emitInt32(Encoded_pad);
933 const MCExpr *ReserveFlatScr) {
938 Context.getOrCreateSymbol(
Twine(KernelName)));
939 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
940 Context.getOrCreateSymbol(
Twine(KernelName) +
Twine(
".kd")));
949 KernelDescriptorSymbol->
setSize(
957 Streamer.
emitLabel(KernelDescriptorSymbol);
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
verify safepoint Safepoint IR Verifier
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitDirectiveAMDGCNTarget() override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitDirectiveAMDGCNTarget() override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
MCELFStreamer & getStreamer()
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override
bool EmitISAVersion() override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
unsigned CodeObjectVersion
This class is intended to be used as a base class for asm properties and features specific to the tar...
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
const MCAsmInfo * getAsmInfo() const
ELFObjectWriter & getWriter()
Base class for the full range of assembler expressions which are needed for parsing.
Streaming machine code generation interface.
MCContext & getContext() const
void emitValue(const MCExpr *Value, unsigned Size, SMLoc Loc=SMLoc())
virtual void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc())
Emit a label for Symbol into the current section.
void emitInt8(uint64_t Value)
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
unsigned getOther() const
void setVisibility(unsigned Visibility)
void setSize(const MCExpr *SS)
bool isBindingSet() const
void setBinding(unsigned Binding) const
unsigned getVisibility() const
unsigned getBinding() const
void setType(unsigned Type) const
void setOther(unsigned Other)
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setIndex(uint32_t Value) const
Set the (implementation defined) index.
bool declareCommon(uint64_t Size, Align Alignment, bool Target=false)
Declare this symbol as being 'common'.
StringRef - Represent a constant reference to a string, i.e.
OSType getOS() const
Get the parsed operating system type of this triple.
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
GPUKind parseArchR600(StringRef CPU)
@ EF_AMDGPU_GENERIC_VERSION_MAX
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX703
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
@ EF_AMDGPU_FEATURE_SRAMECC_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
@ EF_AMDGPU_MACH_R600_CAYMAN
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX704
@ EF_AMDGPU_MACH_AMDGCN_GFX902
@ EF_AMDGPU_MACH_AMDGCN_GFX810
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
@ EF_AMDGPU_MACH_R600_RV730
@ EF_AMDGPU_MACH_R600_RV710
@ EF_AMDGPU_MACH_AMDGCN_GFX908
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
@ EF_AMDGPU_MACH_R600_CYPRESS
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
@ EF_AMDGPU_MACH_R600_R600
@ EF_AMDGPU_MACH_AMDGCN_GFX940
@ EF_AMDGPU_MACH_AMDGCN_GFX941
@ EF_AMDGPU_MACH_R600_TURKS
@ EF_AMDGPU_MACH_R600_JUNIPER
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX601
@ EF_AMDGPU_MACH_AMDGCN_GFX942
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
@ EF_AMDGPU_MACH_R600_R630
@ EF_AMDGPU_MACH_R600_REDWOOD
@ EF_AMDGPU_MACH_R600_RV770
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX600
@ EF_AMDGPU_FEATURE_XNACK_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX602
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
@ EF_AMDGPU_MACH_AMDGCN_GFX801
@ EF_AMDGPU_MACH_AMDGCN_GFX705
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
@ EF_AMDGPU_MACH_R600_RV670
@ EF_AMDGPU_MACH_AMDGCN_GFX701
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
@ EF_AMDGPU_MACH_R600_CEDAR
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
@ EF_AMDGPU_MACH_AMDGCN_GFX700
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX803
@ EF_AMDGPU_MACH_AMDGCN_GFX802
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX900
@ EF_AMDGPU_MACH_AMDGCN_GFX909
@ EF_AMDGPU_MACH_AMDGCN_GFX906
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
@ EF_AMDGPU_MACH_R600_CAICOS
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX904
@ EF_AMDGPU_MACH_R600_RS880
@ EF_AMDGPU_MACH_AMDGCN_GFX805
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
@ EF_AMDGPU_MACH_R600_SUMO
@ EF_AMDGPU_MACH_R600_BARTS
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX702
initializer< Ty > init(const Ty &Val)
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
uint32_t group_segment_fixed_size
uint32_t compute_pgm_rsrc1
uint32_t private_segment_fixed_size
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3
int64_t kernel_code_entry_byte_offset