82 std::unique_ptr<MCStreamer> &&Streamer) {
94 std::unique_ptr<MCStreamer> Streamer)
100 return "AMDGPU Assembly Printer";
117void AMDGPUAsmPrinter::initTargetStreamer(
Module &M) {
123 initializeTargetID(M);
144 initTargetStreamer(M);
152 HSAMetadataStream->end();
167 STM.getCPU() +
" is only available on code object version 6 or better",
174 initializeTargetID(*
F.getParent());
176 const auto &FunctionTargetID = STM.getTargetID();
179 if (FunctionTargetID.isXnackSupported() &&
180 FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
181 FunctionTargetID.getXnackSetting() !=
getTargetStreamer()->getTargetID()->getXnackSetting()) {
183 "' function does not match module xnack setting");
188 if (FunctionTargetID.isSramEccSupported() &&
189 FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
192 "' function does not match module sramecc setting");
199 if (STM.isMesaKernel(
F) &&
203 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
208 if (STM.isAmdHsaOS())
209 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
231 Streamer.pushSection();
232 Streamer.switchSection(&ReadOnlySection);
236 Streamer.emitValueToAlignment(
Align(64), 0, 1, 0);
244 STM, KernelName, getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo),
254 Streamer.popSection();
262 OS <<
"implicit-def: "
266 OS <<
" : SGPR spill to VGPR lane";
286 if (DumpCodeInstEmitter) {
313 ": unsupported initializer for address space");
327 "' is already defined");
336 TS->emitAMDGPULDS(GVSym,
Size, Alignment);
347 switch (CodeObjectVersion) {
349 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();
352 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();
355 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();
365void AMDGPUAsmPrinter::validateMCResourceInfo(
Function &
F) {
375 if (
Value->evaluateAsAbsolute(Val)) {
382 const uint64_t MaxScratchPerWorkitem =
389 ScratchSize > MaxScratchPerWorkitem) {
392 F.getContext().diagnose(DiagStackSize);
405 NumSgpr > MaxAddressableNumSGPRs) {
407 NumSgpr, MaxAddressableNumSGPRs,
409 F.getContext().diagnose(Diag);
418 uint64_t VCCUsed, FlatUsed, NumSgpr;
429 &STM, VCCUsed, FlatUsed,
434 if (NumSgpr > MaxAddressableNumSGPRs) {
438 F.getContext().diagnose(Diag);
450 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
459 uint64_t NumVGPRsForWavesPerEU = std::max(
461 uint64_t NumSGPRsForWavesPerEU = std::max(
471 F,
"amdgpu-waves-per-eu", {0, 0},
true);
473 if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {
475 F,
F.getSubprogram(),
476 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
478 F.getName() +
"': desired occupancy was " +
Twine(MinWEU) +
479 ", final occupancy is " +
Twine(Occupancy));
480 F.getContext().diagnose(Diag);
514 validateMCResourceInfo(
F);
532void AMDGPUAsmPrinter::emitCommonFunctionComments(
537 OutStreamer->emitRawComment(
" TotalNumSgprs: " + getMCExprStr(NumSGPR),
539 OutStreamer->emitRawComment(
" NumVgprs: " + getMCExprStr(NumVGPR),
false);
540 if (NumAGPR && TotalNumVGPR) {
541 OutStreamer->emitRawComment(
" NumAgprs: " + getMCExprStr(NumAGPR),
false);
542 OutStreamer->emitRawComment(
" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),
545 OutStreamer->emitRawComment(
" ScratchSize: " + getMCExprStr(ScratchSize),
551const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
559 KernelCodeProperties |=
560 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
563 KernelCodeProperties |=
564 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
567 KernelCodeProperties |=
568 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
571 KernelCodeProperties |=
572 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
575 KernelCodeProperties |=
576 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
579 KernelCodeProperties |=
580 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
583 KernelCodeProperties |=
584 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
587 KernelCodeProperties |=
588 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
595 const MCExpr *KernelCodePropExpr =
598 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);
603 return KernelCodePropExpr;
620 Align MaxKernArgAlign;
628 int64_t PGRM_Rsrc3 = 1;
629 bool EvaluatableRsrc3 =
632 (void)EvaluatableRsrc3;
634 static_cast<uint64_t>(PGRM_Rsrc3) == 0);
641 return KernelDescriptor;
650 ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
676 getSIProgramInfo(CurrentProgramInfo,
MF);
681 EmitPALMetadata(
MF, CurrentProgramInfo);
683 emitPALFunctionMetadata(
MF);
685 EmitProgramInfoSI(
MF, CurrentProgramInfo);
688 DumpCodeInstEmitter =
nullptr;
732 OutStreamer->emitRawComment(
" Function info:",
false);
734 emitCommonFunctionComments(
749 getFunctionCodeSize(
MF), MFI);
753 OutStreamer->emitRawComment(
" Kernel info:",
false);
754 emitCommonFunctionComments(
758 CurrentProgramInfo.
ScratchSize, getFunctionCodeSize(
MF), MFI);
766 " bytes/workgroup (compile time only)",
false);
769 " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.
SGPRBlocks),
false);
772 " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.
VGPRBlocks),
false);
775 " NumSGPRsForWavesPerEU: " +
779 " NumVGPRsForWavesPerEU: " +
789 " AccumOffset: " + getMCExprStr(AdjustedAccum),
false);
793 " Occupancy: " + getMCExprStr(CurrentProgramInfo.
Occupancy),
false);
799 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
802 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
805 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
808 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
811 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
814 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
817 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
821 [[maybe_unused]] int64_t PGMRSrc3;
825 static_cast<uint64_t>(PGMRSrc3) == 0));
828 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
831 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
832 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
835 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
838 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
839 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
844 if (DumpCodeInstEmitter) {
850 std::string Comment =
"\n";
853 Comment +=
" ; " +
HexLines[i] +
"\n";
865void AMDGPUAsmPrinter::initializeTargetID(
const Module &M) {
879 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
880 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
885 if (TSTargetID->isXnackSupported())
886 if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
888 if (TSTargetID->isSramEccSupported())
889 if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
890 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
905 if (
MI.isDebugInstr())
908 CodeSize +=
TII->getInstSizeInBytes(
MI);
922 const MCExpr *MaximumTaken =
933void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
938 auto CreateExpr = [&Ctx](int64_t
Value) {
944 if (
Value->evaluateAsAbsolute(Val)) {
958 ProgInfo.
NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);
959 ProgInfo.
NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);
965 ProgInfo.
NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);
966 ProgInfo.
ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);
967 ProgInfo.
VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);
968 ProgInfo.
FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);
971 GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
987 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
988 NumSgpr > MaxAddressableNumSGPRs) {
995 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
1006 unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
1009 bool IsPixelShader =
1015 unsigned LastEna = 0;
1017 if (IsPixelShader) {
1028 assert((InputEna || InputAddr) &&
1029 "PSInputAddr and PSInputEnable should "
1030 "never both be 0 for AMDGPU_PS shaders");
1040 unsigned PSArgCount = 0;
1041 unsigned IntermediateVGPR = 0;
1042 for (
auto &Arg :
F.args()) {
1043 unsigned NumRegs = (
DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
1044 if (Arg.hasAttribute(Attribute::InReg)) {
1045 WaveDispatchNumSGPR += NumRegs;
1052 if (IsPixelShader && PSArgCount < 16) {
1053 if ((1 << PSArgCount) & InputAddr) {
1054 if (PSArgCount < LastEna)
1055 WaveDispatchNumVGPR += NumRegs;
1057 IntermediateVGPR += NumRegs;
1063 if (IntermediateVGPR) {
1064 WaveDispatchNumVGPR += IntermediateVGPR;
1065 IntermediateVGPR = 0;
1067 WaveDispatchNumVGPR += NumRegs;
1072 {ProgInfo.
NumSGPR, CreateExpr(WaveDispatchNumSGPR)}, Ctx);
1075 {ProgInfo.
NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
1079 }
else if (
isKernel(
F.getCallingConv()) &&
1106 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
1107 NumSgpr > MaxAddressableNumSGPRs) {
1112 NumSgpr, MaxAddressableNumSGPRs,
1115 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
1145 auto GetNumGPRBlocks = [&CreateExpr, &Ctx](
const MCExpr *NumGPR,
1147 const MCExpr *OneConst = CreateExpr(1ul);
1148 const MCExpr *GranuleConst = CreateExpr(Granule);
1150 const MCExpr *AlignToGPR =
1174 unsigned LDSAlignShift;
1175 if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) {
1178 }
else if (STM.getFeatureBits().test(
1179 FeatureAddressableLocalMemorySize65536)) {
1192 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1195 auto DivideCeil = [&Ctx](
const MCExpr *Numerator,
const MCExpr *Denominator) {
1202 unsigned ScratchAlignShift =
1210 CreateExpr(1ULL << ScratchAlignShift));
1218 unsigned TIDIGCompCnt = 0;
1262 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1263 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
1266 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1267 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
1274 const auto [MinWEU, MaxWEU] =
1277 if (TryGetMCExprValue(ProgInfo.
Occupancy, Occupancy) && Occupancy < MinWEU) {
1279 F,
F.getSubprogram(),
1280 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
1282 F.getName() +
"': desired occupancy was " +
Twine(MinWEU) +
1283 ", final occupancy is " +
Twine(Occupancy));
1284 F.getContext().diagnose(Diag);
1290 default: [[fallthrough]];
1316 auto EmitResolvedOrExpr = [
this](
const MCExpr *
Value,
unsigned Size) {
1318 if (
Value->evaluateAsAbsolute(Val))
1338 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.
ScratchBlocks,
1342 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.
ScratchBlocks,
1346 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.
ScratchBlocks,
1357 SetBits(CurrentProgramInfo.
VGPRBlocks, 0x3F, 0),
1358 SetBits(CurrentProgramInfo.
SGPRBlocks, 0x0F, 6),
1360 EmitResolvedOrExpr(GPRBlocks, 4);
1366 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.
ScratchBlocks,
1370 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.
ScratchBlocks,
1374 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.
ScratchBlocks,
1384 : CurrentProgramInfo.LDSBlocks;
1402 if (ST.hasIEEEMode())
1415 (
unsigned)(CurrentProgramInfo.
LdsSize *
1437 MD->setNumUsedAgprs(
CC, CurrentProgramInfo.
NumAccVGPR);
1441 if (MD->getPALMajorVersion() < 3) {
1446 const MCExpr *HasScratchBlocks =
1450 MD->setRsrc2(
CC,
maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
1453 MD->setHwStage(
CC,
".debug_mode", (
bool)CurrentProgramInfo.
DebugMode);
1469 : CurrentProgramInfo.LDSBlocks;
1470 if (MD->getPALMajorVersion() < 3) {
1479 const unsigned ExtraLdsDwGranularity =
1481 MD->setGraphicsRegisters(
1482 ".ps_extra_lds_size",
1483 (
unsigned)(ExtraLDSSize * ExtraLdsDwGranularity *
sizeof(
uint32_t)));
1487 ".persp_sample_ena",
".persp_center_ena",
1488 ".persp_centroid_ena",
".persp_pull_model_ena",
1489 ".linear_sample_ena",
".linear_center_ena",
1490 ".linear_centroid_ena",
".line_stipple_tex_ena",
1491 ".pos_x_float_ena",
".pos_y_float_ena",
1492 ".pos_z_float_ena",
".pos_w_float_ena",
1493 ".front_face_ena",
".ancillary_ena",
1494 ".sample_coverage_ena",
".pos_fixed_pt_ena"};
1498 MD->setGraphicsRegisters(
".spi_ps_input_ena",
Field,
1499 (
bool)((PSInputEna >>
Idx) & 1));
1500 MD->setGraphicsRegisters(
".spi_ps_input_addr",
Field,
1501 (
bool)((PSInputAddr >>
Idx) & 1));
1507 if (MD->getPALMajorVersion() < 3 && STM.
isWave32())
1511void AMDGPUAsmPrinter::emitPALFunctionMetadata(
const MachineFunction &MF) {
1515 MD->setFunctionScratchSize(FnName, MFI.
getStackSize());
1519 if (MD->getPALMajorVersion() < 3) {
1531 MD->setFunctionLdsSize(FnName, CurrentProgramInfo.
LDSSize);
1597 if (STM.isXNACKEnabled())
1600 Align MaxKernArgAlign;
1619 if (ExtraCode && ExtraCode[0]) {
1620 if (ExtraCode[1] != 0)
1623 switch (ExtraCode[0]) {
1639 int64_t Val = MO.
getImm();
1642 }
else if (isUInt<16>(Val)) {
1644 }
else if (isUInt<32>(Val)) {
1662void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1668 const char *
Name =
"kernel-resource-usage";
1669 const char *Indent =
" ";
1680 auto EmitResourceUsageRemark = [&](
StringRef RemarkName,
1685 std::string LabelStr = RemarkLabel.str() +
": ";
1686 if (RemarkName !=
"FunctionName")
1687 LabelStr = Indent + LabelStr;
1702 EmitResourceUsageRemark(
"FunctionName",
"Function Name",
1704 EmitResourceUsageRemark(
"NumSGPR",
"TotalSGPRs",
1705 getMCExprStr(CurrentProgramInfo.
NumSGPR));
1706 EmitResourceUsageRemark(
"NumVGPR",
"VGPRs",
1709 EmitResourceUsageRemark(
"NumAGPR",
"AGPRs",
1710 getMCExprStr(CurrentProgramInfo.
NumAccVGPR));
1712 EmitResourceUsageRemark(
"ScratchSize",
"ScratchSize [bytes/lane]",
1715 bool DynStackEvaluatable =
1718 DynStackEvaluatable && DynStack ?
"True" :
"False";
1719 EmitResourceUsageRemark(
"DynamicStack",
"Dynamic Stack", DynamicStackStr);
1720 EmitResourceUsageRemark(
"Occupancy",
"Occupancy [waves/SIMD]",
1721 getMCExprStr(CurrentProgramInfo.
Occupancy));
1722 EmitResourceUsageRemark(
"SGPRSpill",
"SGPRs Spill",
1724 EmitResourceUsageRemark(
"VGPRSpill",
"VGPRs Spill",
1726 if (isModuleEntryFunction)
1727 EmitResourceUsageRemark(
"BytesLDS",
"LDS Size [bytes/block]",
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST)
static unsigned getRsrcReg(CallingConv::ID CallConv)
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
AMDGPU Assembly printer class.
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
MC infrastructure to propagate the function level resource usage info.
Analyzes how many registers and other resources are used by functions.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
#define AMD_HSA_BITS_SET(dst, mask, val)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_IS_PTR64
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
R600 Assembly printer class.
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_0286E8_SPI_TMPRING_SIZE
#define FP_ROUND_MODE_DP(x)
#define C_00B84C_SCRATCH_EN
#define FP_ROUND_ROUND_TO_NEAREST
#define R_0286D0_SPI_PS_INPUT_ADDR
#define R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_0286CC_SPI_PS_INPUT_ENA
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define FP_DENORM_MODE_DP(x)
#define R_00B848_COMPUTE_PGM_RSRC1
#define FP_ROUND_MODE_SP(x)
#define FP_DENORM_MODE_SP(x)
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define S_00B02C_EXTRA_LDS_SIZE(x)
#define R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
const MCSubtargetInfo * getGlobalSTI() const
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
std::vector< std::string > DisasmLines
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
std::vector< std::string > HexLines
bool IsTargetStreamerInitialized
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
bool doFinalization(Module &M) override
Shut down the asmprinter.
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
AMDGPUTargetStreamer * getTargetStreamer() const
static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
uint32_t getLDSSize() const
bool isMemoryBound() const
bool needsWaveLimiter() const
bool isEntryFunction() const
bool isModuleEntryFunction() const
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitISAVersion()
void initializeTargetID(const MCSubtargetInfo &STI)
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR)
virtual void EmitDirectiveAMDGCNTarget()
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled)
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
bool isXnackOnOrAny() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
This class is intended to be used as a driving class for all asm writers.
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
TargetMachine & TM
Target machine description.
const MCAsmInfo * MAI
Target Asm Printer information.
MachineFunction * MF
The current machine function.
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
void emitFunctionBody()
This method emits the body and trailer for a function.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
MCSymbol * CurrentFnSym
The symbol for the current function.
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
bool isVerbose() const
Return true if assembly output should contain comments.
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
A parsed version of the target data layout string in and methods for querying it.
Diagnostic information for optimization failures.
Diagnostic information for stack size etc.
DISubprogram * getSubprogram() const
Get the attached subprogram.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
bool hasSGPRInitBug() const
bool isTgSplitEnabled() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool isCuModeEnabled() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumUserSGPRs() const
Generation getGeneration() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxWaveScratchSize() const
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
VisibilityTypes getVisibility() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
MCCodeEmitter * getEmitterPtr() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCObjectFileInfo * getObjectFileInfo() const
MCSectionELF * getELFSection(const Twine &Section, unsigned Type, unsigned Flags)
void reportError(SMLoc L, const Twine &Msg)
Base class for the full range of assembler expressions which are needed for parsing.
MCSection * getReadOnlySection() const
MCContext & getContext() const
void gatherResourceInfo(const MachineFunction &MF, const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &FRI, MCContext &OutContext)
AMDGPUResourceUsageAnalysis gathers resource usage on a per-function granularity.
MCSymbol * getMaxSGPRSymbol(MCContext &OutContext)
MCSymbol * getMaxAGPRSymbol(MCContext &OutContext)
const MCExpr * createTotalNumVGPRs(const MachineFunction &MF, MCContext &Ctx)
void finalize(MCContext &OutContext)
MCSymbol * getMaxVGPRSymbol(MCContext &OutContext)
const MCExpr * createTotalNumSGPRs(const MachineFunction &MF, bool hasXnack, MCContext &Ctx)
MCSymbol * getSymbol(StringRef FuncName, ResourceInfoKind RIK, MCContext &OutContext)
This represents a section on linux, lots of unix variants and some bare metal systems.
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
MCContext & getContext() const
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
const MCExpr * getVariableValue(bool SetUsed=true) const
getVariableValue - Get the value for variable symbols.
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
StringRef getName() const
getName - Get the symbol name.
bool isVariable() const
isVariable - Check if this is a variable symbol.
void redefineIfPossible()
Prepare this symbol to be redefined.
MCStreamer & getStreamer()
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
A Module instance is used to store all the information related to an LLVM module.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getNumSpilledVGPRs() const
unsigned getNumSpilledSGPRs() const
GCNUserSGPRUsageInfo & getUserSGPRInfo()
unsigned getMaxWavesPerEU() const
bool hasWorkGroupIDZ() const
bool hasWorkGroupIDY() const
SIModeRegisterDefaults getMode() const
bool hasWorkGroupInfo() const
bool hasWorkItemIDY() const
bool hasWorkGroupIDX() const
unsigned getNumKernargPreloadedSGPRs() const
unsigned getNumUserSGPRs() const
unsigned getPSInputAddr() const
bool hasWorkItemIDZ() const
unsigned getPSInputEnable() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const MCSubtargetInfo * getMCSubtargetInfo() const
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
Get the parsed operating system type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
IsaVersion getIsaVersion(StringRef GPU)
bool isCompute(CallingConv::ID cc)
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX90A(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isShader(CallingConv::ID cc)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
bool isGFX10Plus(const MCSubtargetInfo &STI)
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Target & getTheR600Target()
The target for R600 GPUs.
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Target & getTheGCNTarget()
The target for GCN GPUs.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Implement std::hash so that hash_code can be used in STL containers.
const SIFunctionResourceInfo & getResourceInfo() const
uint64_t kernarg_segment_byte_size
const MCExpr * workitem_private_segment_byte_size
const MCExpr * compute_pgm_resource2_registers
uint8_t kernarg_segment_alignment
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * wavefront_sgpr_count
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
const MCExpr * workitem_vgpr_count
const MCExpr * is_dynamic_callstack
uint32_t workgroup_group_segment_byte_size
const MCExpr * compute_pgm_resource1_registers
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
virtual bool isAnalysisRemarkEnabled(StringRef PassName) const
Return true if analysis remarks are enabled, override to provide different implementation.
Track resource usage for kernels / entry functions.
const MCExpr * ComputePGMRSrc3GFX90A
const MCExpr * NumArchVGPR
const MCExpr * getComputePGMRSrc2(MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
const MCExpr * VGPRBlocks
const MCExpr * ScratchBlocks
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
uint32_t TrapHandlerEnable
const MCExpr * ScratchEnable
const MCExpr * AccumOffset
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * SGPRBlocks
const MCExpr * NumVGPRsForWavesPerEU
const MCExpr * getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST, MCContext &Ctx) const
const MCExpr * ScratchSize
const MCExpr * NumSGPRsForWavesPerEU
void reset(const MachineFunction &MF)
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.