42 cl::desc(
"Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
48 if (!HSAMetadataDoc.
fromYAML(HSAMetadataString))
249 OS <<
"\t.amdgcn_target \"" <<
getTargetID()->toString() <<
"\"\n";
255 OS <<
"\t.amdhsa_code_object_version " << COV <<
'\n';
264 OS <<
"\t.amd_kernel_code_t\n";
265 Header.EmitKernelCodeT(OS,
getContext(), FoldAndPrint);
266 OS <<
"\t.end_amd_kernel_code_t\n";
274 OS <<
"\t.amdgpu_hsa_kernel " << SymbolName <<
'\n' ;
281 OS <<
"\t.amdgpu_lds " << Symbol->getName() <<
", " <<
Size <<
", "
282 << Alignment.
value() <<
'\n';
291#define PRINT_RES_INFO(ARG) \
293 ARG->print(OS, getContext().getAsmInfo()); \
295 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
296 Streamer.addBlankLine();
314#define PRINT_RES_INFO(ARG) \
316 ARG->print(OS, getContext().getAsmInfo()); \
318 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
319 Streamer.addBlankLine();
329 OS <<
"\t.amd_amdgpu_isa \"" <<
getTargetID()->toString() <<
"\"\n";
336 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
339 std::string HSAMetadataString;
341 HSAMetadataDoc.
toYAML(StrOS);
344 OS << StrOS.
str() <<
'\n';
350 const uint32_t Encoded_s_code_end = 0xbf9f0000;
351 const uint32_t Encoded_s_nop = 0xbf800000;
352 uint32_t Encoded_pad = Encoded_s_code_end;
362 Encoded_pad = Encoded_s_nop;
366 OS <<
"\t.p2alignl " << Log2CacheLineSize <<
", " << Encoded_pad <<
'\n';
367 OS <<
"\t.fill " << (FillSize / 4) <<
", 4, " << Encoded_pad <<
'\n';
375 const MCExpr *ReserveFlatScr) {
379 OS <<
"\t.amdhsa_kernel " << KernelName <<
'\n';
384 const MCExpr *ShiftedAndMaskedExpr =
396 OS <<
"\t\t.amdhsa_group_segment_fixed_size ";
400 OS <<
"\t\t.amdhsa_private_segment_fixed_size ";
404 OS <<
"\t\t.amdhsa_kernarg_size ";
410 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
411 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
412 ".amdhsa_user_sgpr_count");
415 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
416 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
417 ".amdhsa_user_sgpr_count");
423 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
425 ".amdhsa_user_sgpr_private_segment_buffer");
427 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
429 ".amdhsa_user_sgpr_dispatch_ptr");
431 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
432 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
433 ".amdhsa_user_sgpr_queue_ptr");
435 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
436 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
437 ".amdhsa_user_sgpr_kernarg_segment_ptr");
439 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
440 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
441 ".amdhsa_user_sgpr_dispatch_id");
444 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
445 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
446 ".amdhsa_user_sgpr_flat_scratch_init");
449 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
450 ".amdhsa_user_sgpr_kernarg_preload_length");
452 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
453 ".amdhsa_user_sgpr_kernarg_preload_offset");
457 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
458 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
459 ".amdhsa_user_sgpr_private_segment_size");
460 if (IVersion.
Major >= 10)
462 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
463 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
464 ".amdhsa_wavefront_size32");
467 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
468 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
469 ".amdhsa_uses_dynamic_stack");
471 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
472 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
474 ?
".amdhsa_enable_private_segment"
475 :
".amdhsa_system_sgpr_private_segment_wavefront_offset"));
477 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
479 ".amdhsa_system_sgpr_workgroup_id_x");
481 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
482 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
483 ".amdhsa_system_sgpr_workgroup_id_y");
485 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
487 ".amdhsa_system_sgpr_workgroup_id_z");
489 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
490 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
491 ".amdhsa_system_sgpr_workgroup_info");
493 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
494 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
495 ".amdhsa_system_vgpr_workitem_id");
498 OS <<
"\t\t.amdhsa_next_free_vgpr ";
499 EmitMCExpr(NextVGPR);
502 OS <<
"\t\t.amdhsa_next_free_sgpr ";
503 EmitMCExpr(NextSGPR);
510 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
511 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
getContext());
516 OS <<
"\t\t.amdhsa_accum_offset ";
524 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
525 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
526 ".amdhsa_named_barrier_count");
528 OS <<
"\t\t.amdhsa_reserve_vcc ";
529 EmitMCExpr(ReserveVCC);
533 OS <<
"\t\t.amdhsa_reserve_flat_scratch ";
534 EmitMCExpr(ReserveFlatScr);
544 OS <<
"\t\t.amdhsa_reserve_xnack_mask " <<
getTargetID()->isXnackOnOrAny() <<
'\n';
549 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
550 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
551 ".amdhsa_float_round_mode_32");
553 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
554 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
555 ".amdhsa_float_round_mode_16_64");
557 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
558 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
559 ".amdhsa_float_denorm_mode_32");
561 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
562 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
563 ".amdhsa_float_denorm_mode_16_64");
564 if (IVersion.
Major < 12) {
566 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
567 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
568 ".amdhsa_dx10_clamp");
570 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
571 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
572 ".amdhsa_ieee_mode");
574 if (IVersion.
Major >= 9) {
576 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
577 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
578 ".amdhsa_fp16_overflow");
582 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
583 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
".amdhsa_tg_split");
586 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
587 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
588 ".amdhsa_workgroup_processor_mode");
589 if (IVersion.
Major >= 10) {
591 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
592 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
593 ".amdhsa_memory_ordered");
595 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
596 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
597 ".amdhsa_forward_progress");
599 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
601 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
602 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
603 ".amdhsa_shared_vgpr_count");
605 if (IVersion.
Major == 11) {
607 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
608 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
609 ".amdhsa_inst_pref_size");
611 if (IVersion.
Major >= 12) {
613 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
614 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
615 ".amdhsa_inst_pref_size");
617 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
618 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
619 ".amdhsa_round_robin_scheduling");
624 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
625 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
626 ".amdhsa_exception_fp_ieee_invalid_op");
629 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
630 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
631 ".amdhsa_exception_fp_denorm_src");
635 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
636 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
637 ".amdhsa_exception_fp_ieee_div_zero");
640 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
641 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
642 ".amdhsa_exception_fp_ieee_overflow");
645 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
646 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
647 ".amdhsa_exception_fp_ieee_underflow");
650 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
651 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
652 ".amdhsa_exception_fp_ieee_inexact");
655 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
656 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
657 ".amdhsa_exception_int_div_zero");
659 OS <<
"\t.end_amdhsa_kernel\n";
679 W.setELFHeaderEFlags(getEFlags());
680 W.setOverrideABIVersion(
697void AMDGPUTargetELFStreamer::EmitNote(
701 auto &Context = S.getContext();
703 auto NameSZ = Name.size() + 1;
705 unsigned NoteFlags = 0;
715 S.emitValue(DescSZ, 4);
716 S.emitInt32(NoteType);
718 S.emitValueToAlignment(
Align(4), 0, 1, 0);
720 S.emitValueToAlignment(
Align(4), 0, 1, 0);
724unsigned AMDGPUTargetELFStreamer::getEFlags() {
729 return getEFlagsR600();
731 return getEFlagsAMDGCN();
735unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
741unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
742 assert(STI.getTargetTriple().isAMDGCN());
744 switch (STI.getTargetTriple().getOS()) {
749 return getEFlagsUnknownOS();
751 return getEFlagsAMDHSA();
753 return getEFlagsAMDPAL();
755 return getEFlagsMesa3D();
759unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
763 return getEFlagsV3();
766unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
770 return getEFlagsV6();
771 return getEFlagsV4();
774unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
777 return getEFlagsV3();
780unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
783 return getEFlagsV3();
786unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
787 unsigned EFlagsV3 = 0;
802unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
803 unsigned EFlagsV4 = 0;
842unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
843 unsigned Flags = getEFlagsV4();
879 " - no ELF flag can represent this version!");
904 auto *SymbolELF =
static_cast<MCSymbolELF *
>(Symbol);
907 if (!SymbolELF->isBindingSet())
910 if (SymbolELF->declareCommon(
Size, Alignment)) {
912 " redeclared as different type");
923 auto *DescBegin = Context.createTempSymbol();
924 auto *DescEnd = Context.createTempSymbol();
941 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
944 std::string HSAMetadataString;
950 auto *DescBegin = Context.createTempSymbol();
951 auto *DescEnd = Context.createTempSymbol();
966 const uint32_t Encoded_s_code_end = 0xbf9f0000;
967 const uint32_t Encoded_s_nop = 0xbf800000;
968 uint32_t Encoded_pad = Encoded_s_code_end;
978 Encoded_pad = Encoded_s_nop;
985 for (
unsigned I = 0;
I < FillSize;
I += 4)
995 const MCExpr *ReserveFlatScr) {
997 auto &Context = Streamer.getContext();
999 auto *KernelCodeSymbol =
1001 auto *KernelDescriptorSymbol =
static_cast<MCSymbolELF *
>(
1002 Context.getOrCreateSymbol(
Twine(KernelName) +
Twine(
".kd")));
1006 KernelDescriptorSymbol->
setBinding(KernelCodeSymbol->getBinding());
1007 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1008 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1011 KernelDescriptorSymbol->setSize(
1019 Streamer.emitLabel(KernelDescriptorSymbol);
1030 Streamer.emitInt8(0u);
1043 Streamer.emitInt8(0u);
1056 Streamer.emitInt8(0u);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
verify safepoint Safepoint IR Verifier
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitDirectiveAMDGCNTarget() override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitDirectiveAMDGCNTarget() override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
MCELFStreamer & getStreamer()
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
AMDGPUTargetStreamer(MCStreamer &S)
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
unsigned CodeObjectVersion
This class is intended to be used as a base class for asm properties and features specific to the tar...
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
const MCAsmInfo * getAsmInfo() const
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
ELFObjectWriter & getWriter()
void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc()) override
Emit a label for Symbol into the current section.
Base class for the full range of assembler expressions which are needed for parsing.
void emitBytes(StringRef Data) override
Emit the bytes in Data into the output.
Streaming machine code generation interface.
virtual bool popSection()
Restore the current and previous section from the section stack.
MCContext & getContext() const
virtual void emitValueToAlignment(Align Alignment, int64_t Fill=0, uint8_t FillLen=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
void pushSection()
Save the current and previous section on the section stack.
void emitInt32(uint64_t Value)
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
void setBinding(unsigned Binding) const
void setType(unsigned Type) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
StringRef - Represent a constant reference to a string, i.e.
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static constexpr unsigned GFX12_5
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
LLVM_ABI StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
LLVM_ABI GPUKind parseArchR600(StringRef CPU)
@ EF_AMDGPU_GENERIC_VERSION_MAX
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX703
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
@ EF_AMDGPU_FEATURE_SRAMECC_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
@ EF_AMDGPU_MACH_R600_CAYMAN
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX704
@ EF_AMDGPU_MACH_AMDGCN_GFX902
@ EF_AMDGPU_MACH_AMDGCN_GFX810
@ EF_AMDGPU_MACH_AMDGCN_GFX950
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
@ EF_AMDGPU_MACH_R600_RV730
@ EF_AMDGPU_MACH_R600_RV710
@ EF_AMDGPU_MACH_AMDGCN_GFX908
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
@ EF_AMDGPU_MACH_R600_CYPRESS
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
@ EF_AMDGPU_MACH_R600_R600
@ EF_AMDGPU_MACH_AMDGCN_GFX1250
@ EF_AMDGPU_MACH_R600_TURKS
@ EF_AMDGPU_MACH_R600_JUNIPER
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX601
@ EF_AMDGPU_MACH_AMDGCN_GFX942
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
@ EF_AMDGPU_MACH_R600_R630
@ EF_AMDGPU_MACH_R600_REDWOOD
@ EF_AMDGPU_MACH_R600_RV770
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX600
@ EF_AMDGPU_FEATURE_XNACK_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX602
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
@ EF_AMDGPU_MACH_AMDGCN_GFX1310
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
@ EF_AMDGPU_MACH_AMDGCN_GFX801
@ EF_AMDGPU_MACH_AMDGCN_GFX705
@ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1153
@ EF_AMDGPU_MACH_AMDGCN_GFX1170
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
@ EF_AMDGPU_MACH_R600_RV670
@ EF_AMDGPU_MACH_AMDGCN_GFX701
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
@ EF_AMDGPU_MACH_R600_CEDAR
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
@ EF_AMDGPU_MACH_AMDGCN_GFX700
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX803
@ EF_AMDGPU_MACH_AMDGCN_GFX802
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX900
@ EF_AMDGPU_MACH_AMDGCN_GFX909
@ EF_AMDGPU_MACH_AMDGCN_GFX906
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
@ EF_AMDGPU_MACH_R600_CAICOS
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX904
@ EF_AMDGPU_MACH_AMDGCN_GFX1251
@ EF_AMDGPU_MACH_R600_RS880
@ EF_AMDGPU_MACH_AMDGCN_GFX805
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
@ EF_AMDGPU_MACH_R600_SUMO
@ EF_AMDGPU_MACH_R600_BARTS
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX702
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
uint32_t group_segment_fixed_size
uint32_t compute_pgm_rsrc1
uint32_t private_segment_fixed_size
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3
int64_t kernel_code_entry_byte_offset