87 std::unique_ptr<MCStreamer> &&Streamer) {
102 AMDGPUAsmPrinter *Asm;
105 AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *
A) : Asm(
A) {}
107 void beginFunction(
const MachineFunction *MF)
override {}
109 void endFunction(
const MachineFunction *MF)
override { Asm->endFunction(MF); }
111 void endModule()
override {}
116 std::unique_ptr<MCStreamer> Streamer)
122 return "AMDGPU Assembly Printer";
126 return &
TM.getMCSubtargetInfo();
139void AMDGPUAsmPrinter::initTargetStreamer(
Module &M) {
145 initializeTargetID(M);
147 const Triple &TT = M.getTargetTriple();
166 initTargetStreamer(M);
168 const Triple &TT = M.getTargetTriple();
175 HSAMetadataStream->end();
190 STM.getCPU() +
" is only available on code object version 6 or better");
196 initializeTargetID(*
F.getParent());
198 const auto &FunctionTargetID = STM.getTargetID();
201 if (FunctionTargetID.isXnackSupported() &&
202 FunctionTargetID.getXnackSetting() != AMDGPU::TargetIDSetting::Any &&
203 FunctionTargetID.getXnackSetting() !=
206 {},
"xnack setting of '" +
Twine(
MF->getName()) +
207 "' function does not match module xnack setting");
212 if (FunctionTargetID.isSramEccSupported() &&
213 FunctionTargetID.getSramEccSetting() != AMDGPU::TargetIDSetting::Any &&
214 FunctionTargetID.getSramEccSetting() !=
217 {},
"sramecc setting of '" +
Twine(
MF->getName()) +
218 "' function does not match module sramecc setting");
225 if (STM.isMesaKernel(
F) &&
229 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
234 if (STM.isAmdHsaOS())
235 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
261 getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo);
274 const MCExpr *InstPrefSize =
285 Streamer.pushSection();
286 Streamer.switchSection(&ReadOnlySection);
290 Streamer.emitValueToAlignment(
Align(64), 0, 1, 0);
296 STM, KernelName, KD, CurrentProgramInfo.NumVGPRsForWavesPerEU,
298 CurrentProgramInfo.NumSGPRsForWavesPerEU,
300 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
303 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
305 Streamer.popSection();
313 OS <<
"implicit-def: "
314 <<
printReg(RegNo,
MF->getSubtarget().getRegisterInfo());
317 OS <<
" : SGPR spill to VGPR lane";
337 if (DumpCodeInstEmitter) {
364 ": unsupported initializer for address space");
377 "LDS definitions should have been externalized when object "
378 "linking is enabled");
386 "' is already defined");
395 TS->emitAMDGPULDS(GVSym,
Size, Alignment);
407 switch (CodeObjectVersion) {
409 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();
412 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();
415 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();
434 unsigned DynamicVGPRBlockSize,
447 auto CreateExpr = [&Ctx](
unsigned Value) {
457 {CreateExpr(MaxWaves), CreateExpr(Granule),
458 CreateExpr(TargetTotalNumVGPRs),
459 CreateExpr(InitOcc), CreateExpr(SGPRTotal),
460 CreateExpr(SGPRGranule),
461 CreateExpr(SGPRTrapReserve), SGPRArg, NumVGPRs},
465void AMDGPUAsmPrinter::validateMCResourceInfo(
Function &
F) {
470 const GCNSubtarget &STM =
TM.getSubtarget<GCNSubtarget>(
F);
473 auto TryGetMCExprValue = [](
const MCExpr *
Value, uint64_t &Res) ->
bool {
475 if (
Value->evaluateAsAbsolute(Val)) {
482 const uint64_t MaxScratchPerWorkitem =
485 RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize,
OutContext);
486 uint64_t ScratchSize;
489 ScratchSize > MaxScratchPerWorkitem) {
490 DiagnosticInfoStackSize DiagStackSize(
F, ScratchSize, MaxScratchPerWorkitem,
492 F.getContext().diagnose(DiagStackSize);
498 RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR,
OutContext);
500 !STM.hasSGPRInitBug()) {
505 NumSgpr > MaxAddressableNumSGPRs) {
506 F.getContext().diagnose(DiagnosticInfoResourceLimit(
507 F,
"addressable scalar registers", NumSgpr, MaxAddressableNumSGPRs,
514 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC,
OutContext);
516 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch,
OutContext);
517 uint64_t VCCUsed, FlatUsed, NumSgpr;
528 STM, VCCUsed, FlatUsed,
531 STM.hasSGPRInitBug()) {
533 if (NumSgpr > MaxAddressableNumSGPRs) {
534 F.getContext().diagnose(DiagnosticInfoResourceLimit(
535 F,
"scalar registers", NumSgpr, MaxAddressableNumSGPRs,
DS_Error,
542 RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR,
OutContext);
544 RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR,
OutContext);
545 uint64_t NumVgpr, NumAgpr;
547 MachineModuleInfo &
MMI =
549 MachineFunction *
MF =
MMI.getMachineFunction(
F);
553 const SIMachineFunctionInfo &MFI = *
MF->getInfo<SIMachineFunctionInfo>();
555 uint64_t TotalNumVgpr =
557 uint64_t NumVGPRsForWavesPerEU =
558 std::max({TotalNumVgpr, (uint64_t)1,
561 uint64_t NumSGPRsForWavesPerEU = std::max(
571 F,
"amdgpu-waves-per-eu", {0, 0},
true);
573 if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {
574 DiagnosticInfoOptimizationFailure Diag(
575 F,
F.getSubprogram(),
576 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
578 F.getName() +
"': desired occupancy was " + Twine(MinWEU) +
579 ", final occupancy is " + Twine(Occupancy));
580 F.getContext().diagnose(Diag);
589 if (Ty->isVoidTy()) {
593 unsigned Bits =
DL.getTypeSizeInBits(Ty);
614 for (
Type *ParamTy : FTy->params())
622 const SIInstrInfo *
TII =
MF->getSubtarget<GCNSubtarget>().getInstrInfo();
623 const MachineOperand *
Callee =
624 TII->getNamedOperand(
MI, AMDGPU::OpName::callee);
625 if (!Callee || !
Callee->isGlobal())
627 DirectCallEdges.insert(
631void AMDGPUAsmPrinter::emitAMDGPUInfo(
Module &M) {
635 const NamedMDNode *LDSMD =
M.getNamedMetadata(
"amdgpu.lds.uses");
638 const NamedMDNode *BarMD =
M.getNamedMetadata(
"amdgpu.named_barrier.uses");
642 DenseMap<const Function *, std::string> AddrTakenTypeIds;
643 using IndirectCallInfo = std::pair<const Function *, std::string>;
646 for (
const Function &
F : M) {
649 if (!IsKernel &&
F.hasAddressTaken(
nullptr,
653 AddrTakenTypeIds[&
F] =
657 if (
F.isDeclaration())
660 StringSet<> SeenTypeIds;
661 for (
const BasicBlock &BB :
F) {
662 for (
const Instruction &
I : BB) {
664 if (!CB || !CB->isIndirectCall())
668 if (SeenTypeIds.
insert(TId).second)
669 IndirectCalls.
push_back({&
F, std::move(TId)});
674 if (FunctionInfos.empty() && DirectCallEdges.empty() && !HasLDSUses &&
675 !HasNamedBarriers && AddrTakenTypeIds.
empty() && IndirectCalls.
empty())
678 AMDGPU::InfoSectionData
Data;
679 Data.Funcs = std::move(FunctionInfos);
681 for (
auto &[
F, TypeId] : AddrTakenTypeIds) {
683 Data.TypeIds.push_back({Sym, TypeId});
686 for (
auto &[CallerSym, CalleeSym] : DirectCallEdges)
687 Data.Calls.push_back({CallerSym, CalleeSym});
688 DirectCallEdges.clear();
691 for (
const MDNode *
N : LDSMD->
operands()) {
698 if (HasNamedBarriers) {
699 for (
const MDNode *
N : BarMD->
operands()) {
702 for (
unsigned I = 1,
E =
N->getNumOperands();
I <
E; ++
I) {
709 for (
auto &[Caller, Enc] : IndirectCalls) {
711 Data.IndirectCalls.push_back({CallerSym, Enc});
718 const Triple &TT = M.getTargetTriple();
759 validateMCResourceInfo(
F);
778void AMDGPUAsmPrinter::emitCommonFunctionComments(
783 OutStreamer->emitRawComment(
" TotalNumSgprs: " + getMCExprStr(NumSGPR),
785 OutStreamer->emitRawComment(
" NumVgprs: " + getMCExprStr(NumVGPR),
false);
786 if (NumAGPR && TotalNumVGPR) {
787 OutStreamer->emitRawComment(
" NumAgprs: " + getMCExprStr(NumAGPR),
false);
788 OutStreamer->emitRawComment(
" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),
791 OutStreamer->emitRawComment(
" ScratchSize: " + getMCExprStr(ScratchSize),
797const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
799 const SIMachineFunctionInfo &MFI = *
MF.getInfo<SIMachineFunctionInfo>();
800 MCContext &Ctx =
MF.getContext();
801 uint16_t KernelCodeProperties = 0;
805 KernelCodeProperties |=
806 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
809 KernelCodeProperties |=
810 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
813 KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
816 KernelCodeProperties |=
817 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
820 KernelCodeProperties |=
821 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
824 KernelCodeProperties |=
825 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
828 KernelCodeProperties |=
829 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
831 if (
MF.getSubtarget<GCNSubtarget>().isWave32()) {
832 KernelCodeProperties |=
833 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
840 const MCExpr *KernelCodePropExpr =
843 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);
848 return KernelCodePropExpr;
854 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
856 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
857 MCContext &Ctx =
MF.getContext();
859 MCKernelDescriptor KernelDescriptor;
865 Align MaxKernArgAlign;
873 int64_t PGM_Rsrc3 = 1;
874 bool EvaluatableRsrc3 =
875 CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGM_Rsrc3);
877 (void)EvaluatableRsrc3;
879 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() || !EvaluatableRsrc3 ||
880 static_cast<uint64_t
>(PGM_Rsrc3) == 0);
887 return KernelDescriptor;
894 initTargetStreamer(*
MF.getFunction().getParent());
898 CurrentProgramInfo.reset(
MF);
924 FunctionInfos.push_back(
936 getSIProgramInfo(CurrentProgramInfo,
MF);
941 EmitPALMetadata(
MF, CurrentProgramInfo);
943 emitPALFunctionMetadata(
MF);
945 EmitProgramInfoSI(
MF, CurrentProgramInfo);
948 DumpCodeInstEmitter =
nullptr;
949 if (STM.dumpCode()) {
972 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_NumNamedBarrier,
974 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
977 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
979 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
981 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasRecursion,
983 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
997 OutStreamer->emitRawComment(
" Function info:",
false);
999 emitCommonFunctionComments(
1001 ->getVariableValue(),
1002 STM.hasMAIInsts() ? RI.getSymbol(
CurrentFnSym->getName(),
1004 ->getVariableValue()
1006 RI.createTotalNumVGPRs(
MF, Ctx),
1007 RI.createTotalNumSGPRs(
1011 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
1013 ->getVariableValue(),
1014 CurrentProgramInfo.getFunctionCodeSize(
MF), MFI);
1018 OutStreamer->emitRawComment(
" Kernel info:",
false);
1019 emitCommonFunctionComments(
1020 CurrentProgramInfo.NumArchVGPR,
1021 STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR :
nullptr,
1022 CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
1023 CurrentProgramInfo.ScratchSize,
1024 CurrentProgramInfo.getFunctionCodeSize(
MF), MFI);
1027 " FloatMode: " +
Twine(CurrentProgramInfo.FloatMode),
false);
1029 " IeeeMode: " +
Twine(CurrentProgramInfo.IEEEMode),
false);
1031 " LDSByteSize: " +
Twine(CurrentProgramInfo.LDSSize) +
1032 " bytes/workgroup (compile time only)",
1036 " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks),
false);
1039 " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks),
false);
1042 " NumSGPRsForWavesPerEU: " +
1043 getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU),
1046 " NumVGPRsForWavesPerEU: " +
1047 getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU),
1050 if (STM.hasGFX90AInsts()) {
1056 " AccumOffset: " + getMCExprStr(AdjustedAccum),
false);
1059 if (STM.hasGFX1250Insts())
1061 " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
1065 " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy),
false);
1071 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
1072 getMCExprStr(CurrentProgramInfo.ScratchEnable),
1074 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
1075 Twine(CurrentProgramInfo.UserSGPR),
1077 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
1078 Twine(CurrentProgramInfo.TrapHandlerEnable),
1080 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
1081 Twine(CurrentProgramInfo.TGIdXEnable),
1083 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
1084 Twine(CurrentProgramInfo.TGIdYEnable),
1086 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
1087 Twine(CurrentProgramInfo.TGIdZEnable),
1089 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
1090 Twine(CurrentProgramInfo.TIdIGCompCount),
1093 [[maybe_unused]] int64_t PGMRSrc3;
1095 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() ||
1096 (CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&
1097 static_cast<uint64_t>(PGMRSrc3) == 0));
1098 if (STM.hasGFX90AInsts()) {
1100 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
1102 CurrentProgramInfo.ComputePGMRSrc3,
1103 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
1104 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
1107 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
1109 CurrentProgramInfo.ComputePGMRSrc3,
1110 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
1111 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
1116 if (DumpCodeInstEmitter) {
1122 std::string Comment =
"\n";
1125 Comment +=
" ; " +
HexLines[i] +
"\n";
1150 const MCExpr *EncodedBlocks;
1153 NumVGPRs,
nullptr) &&
1154 NumVGPRs.isAbsolute()) {
1158 unsigned NumBlocks =
1163 {},
"DVGPR block count " +
Twine(NumBlocks) +
1164 " exceeds maximum of " +
1166 " for __dvgpr$ symbol for '" +
1170 unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
1178 {CurrentProgramInfo.NumVGPRsForWavesPerEU, One}, Ctx);
1181 BlockSizeConst, Ctx);
1192 OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
1199void AMDGPUAsmPrinter::initializeTargetID(
const Module &M) {
1213 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
1214 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
1217 const GCNSubtarget &STM =
TM.getSubtarget<GCNSubtarget>(
F);
1219 if (TSTargetID->isXnackSupported())
1220 if (TSTargetID->getXnackSetting() == AMDGPU::TargetIDSetting::Any)
1222 if (TSTargetID->isSramEccSupported())
1223 if (TSTargetID->getSramEccSetting() == AMDGPU::TargetIDSetting::Any)
1224 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
1235 const MCExpr *MaximumTaken =
1246void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
1248 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1249 MCContext &Ctx =
MF.getContext();
1251 auto CreateExpr = [&Ctx](int64_t
Value) {
1255 auto TryGetMCExprValue = [](
const MCExpr *
Value, uint64_t &Res) ->
bool {
1257 if (
Value->evaluateAsAbsolute(Val)) {
1264 auto GetSymRefExpr =
1271 ProgInfo.
NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);
1272 ProgInfo.
NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);
1278 ProgInfo.
NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);
1279 ProgInfo.
ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);
1280 ProgInfo.
VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);
1281 ProgInfo.
FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);
1284 GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
1288 GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);
1291 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1302 !STM.hasSGPRInitBug()) {
1305 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
1306 NumSgpr > MaxAddressableNumSGPRs) {
1308 LLVMContext &Ctx =
MF.getFunction().getContext();
1309 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1310 MF.getFunction(),
"addressable scalar registers", NumSgpr,
1312 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
1326 if (WaveDispatchNumSGPR) {
1334 if (WaveDispatchNumVGPR) {
1336 {ProgInfo.
NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
1356 STM.hasSGPRInitBug()) {
1359 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
1360 NumSgpr > MaxAddressableNumSGPRs) {
1363 LLVMContext &Ctx =
MF.getFunction().getContext();
1364 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1365 MF.getFunction(),
"scalar registers", NumSgpr, MaxAddressableNumSGPRs,
1367 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
1372 if (STM.hasSGPRInitBug()) {
1380 LLVMContext &Ctx =
MF.getFunction().getContext();
1381 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1387 LLVMContext &Ctx =
MF.getFunction().getContext();
1388 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1394 auto GetNumGPRBlocks = [&CreateExpr, &Ctx](
const MCExpr *NumGPR,
1396 const MCExpr *OneConst = CreateExpr(1ul);
1397 const MCExpr *GranuleConst = CreateExpr(Granule);
1399 const MCExpr *AlignToGPR =
1401 const MCExpr *DivGPR =
1416 const SIModeRegisterDefaults
Mode = MFI->
getMode();
1427 unsigned LDSAlignShift = 8;
1448 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1451 auto DivideCeil = [&Ctx](
const MCExpr *Numerator,
const MCExpr *Denominator) {
1452 const MCExpr *Ceil =
1458 unsigned ScratchAlignShift =
1466 CreateExpr(1ULL << ScratchAlignShift));
1474 ProgInfo.
FwdProgress = !
F.hasFnAttribute(
"amdgpu-no-fwd-progress");
1478 unsigned TIDIGCompCnt = 0;
1507 if (STM.hasGFX90AInsts()) {
1510 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1511 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, Ctx);
1514 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1515 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, Ctx);
1518 if (STM.hasGFX1250Insts())
1521 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
1522 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT, Ctx);
1529 const auto [MinWEU, MaxWEU] =
1532 if (TryGetMCExprValue(ProgInfo.
Occupancy, Occupancy) && Occupancy < MinWEU) {
1533 DiagnosticInfoOptimizationFailure Diag(
1534 F,
F.getSubprogram(),
1535 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
1537 F.getName() +
"': desired occupancy was " + Twine(MinWEU) +
1538 ", final occupancy is " + Twine(Occupancy));
1539 F.getContext().diagnose(Diag);
1564void AMDGPUAsmPrinter::EmitProgramInfoSI(
1566 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1567 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1568 unsigned RsrcReg =
getRsrcReg(
MF.getFunction().getCallingConv());
1569 MCContext &Ctx =
MF.getContext();
1572 auto SetBits = [&Ctx](
const MCExpr *
Value, uint32_t
Mask, uint32_t Shift) {
1579 auto EmitResolvedOrExpr = [
this](
const MCExpr *
Value,
unsigned Size) {
1581 if (
Value->evaluateAsAbsolute(Val))
1590 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),
1594 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx),
1602 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1606 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1610 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1621 SetBits(CurrentProgramInfo.VGPRBlocks, 0x3F, 0),
1622 SetBits(CurrentProgramInfo.SGPRBlocks, 0x0F, 6),
1624 EmitResolvedOrExpr(GPRBlocks, 4);
1630 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1634 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1638 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1647 ?
divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1648 : CurrentProgramInfo.LDSBlocks;
1666 unsigned DynamicVGPRBlockSize) {
1667 if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1679 if (DynamicVGPRBlockSize != 0)
1694void AMDGPUAsmPrinter::EmitPALMetadata(
1696 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1697 auto CC =
MF.getFunction().getCallingConv();
1699 auto &Ctx =
MF.getContext();
1701 MD->setEntryPoint(CC,
MF.getFunction().getName());
1702 MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
1706 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1709 MD->setHwStage(CC,
".dynamic_vgpr_saved_count",
1713 if (STM.hasMAIInsts()) {
1714 MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
1717 MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
1718 if (MD->getPALMajorVersion() < 3) {
1719 MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
1721 MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx), Ctx);
1723 const MCExpr *HasScratchBlocks =
1727 MD->setRsrc2(CC,
maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
1730 MD->setHwStage(CC,
".debug_mode", (
bool)CurrentProgramInfo.DebugMode);
1732 CurrentProgramInfo.ScratchEnable);
1746 ?
divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1747 : CurrentProgramInfo.LDSBlocks;
1748 if (MD->getPALMajorVersion() < 3) {
1757 const unsigned ExtraLdsDwGranularity =
1759 MD->setGraphicsRegisters(
1760 ".ps_extra_lds_size",
1761 (
unsigned)(ExtraLDSSize * ExtraLdsDwGranularity *
sizeof(uint32_t)));
1764 static StringLiteral
const PsInputFields[] = {
1765 ".persp_sample_ena",
".persp_center_ena",
1766 ".persp_centroid_ena",
".persp_pull_model_ena",
1767 ".linear_sample_ena",
".linear_center_ena",
1768 ".linear_centroid_ena",
".line_stipple_tex_ena",
1769 ".pos_x_float_ena",
".pos_y_float_ena",
1770 ".pos_z_float_ena",
".pos_w_float_ena",
1771 ".front_face_ena",
".ancillary_ena",
1772 ".sample_coverage_ena",
".pos_fixed_pt_ena"};
1776 MD->setGraphicsRegisters(
".spi_ps_input_ena",
Field,
1777 (
bool)((PSInputEna >> Idx) & 1));
1778 MD->setGraphicsRegisters(
".spi_ps_input_addr",
Field,
1779 (
bool)((PSInputAddr >> Idx) & 1));
1785 if (MD->getPALMajorVersion() < 3 && STM.
isWave32())
1786 MD->setWave32(
MF.getFunction().getCallingConv());
1789void AMDGPUAsmPrinter::emitPALFunctionMetadata(
const MachineFunction &MF) {
1791 const MachineFrameInfo &MFI =
MF.getFrameInfo();
1792 StringRef FnName =
MF.getFunction().getName();
1793 MD->setFunctionScratchSize(FnName, MFI.
getStackSize());
1794 const GCNSubtarget &
ST =
MF.getSubtarget<GCNSubtarget>();
1795 MCContext &Ctx =
MF.getContext();
1797 if (MD->getPALMajorVersion() < 3) {
1803 CurrentProgramInfo.getComputePGMRSrc2(ST, Ctx), Ctx);
1807 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize());
1811 MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
1812 MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1813 MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1830void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
1837 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1838 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1839 MCContext &Ctx =
MF.getContext();
1844 CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
1846 CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx);
1877 if (STM.isXNACKEnabled())
1880 Align MaxKernArgAlign;
1899 if (ExtraCode && ExtraCode[0]) {
1900 if (ExtraCode[1] != 0)
1903 switch (ExtraCode[0]) {
1915 *
MF->getSubtarget().getRegisterInfo());
1919 int64_t Val = MO.
getImm();
1942void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1948 const char *Name =
"kernel-resource-usage";
1949 const char *Indent =
" ";
1953 if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
1960 auto EmitResourceUsageRemark = [&](
StringRef RemarkName,
1965 std::string LabelStr = RemarkLabel.str() +
": ";
1966 if (RemarkName !=
"FunctionName")
1967 LabelStr = Indent + LabelStr;
1982 EmitResourceUsageRemark(
"FunctionName",
"Function Name",
1983 MF.getFunction().getName());
1984 EmitResourceUsageRemark(
"NumSGPR",
"TotalSGPRs",
1985 getMCExprStr(CurrentProgramInfo.NumSGPR));
1986 EmitResourceUsageRemark(
"NumVGPR",
"VGPRs",
1987 getMCExprStr(CurrentProgramInfo.NumArchVGPR));
1989 EmitResourceUsageRemark(
"NumAGPR",
"AGPRs",
1990 getMCExprStr(CurrentProgramInfo.NumAccVGPR));
1992 EmitResourceUsageRemark(
"ScratchSize",
"ScratchSize [bytes/lane]",
1993 getMCExprStr(CurrentProgramInfo.ScratchSize));
1995 bool DynStackEvaluatable =
1996 CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);
1997 StringRef DynamicStackStr =
1998 DynStackEvaluatable && DynStack ?
"True" :
"False";
1999 EmitResourceUsageRemark(
"DynamicStack",
"Dynamic Stack", DynamicStackStr);
2000 EmitResourceUsageRemark(
"Occupancy",
"Occupancy [waves/SIMD]",
2001 getMCExprStr(CurrentProgramInfo.Occupancy));
2002 EmitResourceUsageRemark(
"SGPRSpill",
"SGPRs Spill",
2003 CurrentProgramInfo.SGPRSpill);
2004 EmitResourceUsageRemark(
"VGPRSpill",
"VGPRs Spill",
2005 CurrentProgramInfo.VGPRSpill);
2006 if (isModuleEntryFunction)
2007 EmitResourceUsageRemark(
"BytesLDS",
"LDS Size [bytes/block]",
2008 CurrentProgramInfo.LDSSize);
2014 "AMDGPU Assembly Printer",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize)
const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, unsigned DynamicVGPRBlockSize, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
static unsigned getRsrcReg(CallingConv::ID CallConv)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
static const MCExpr * setBits(const MCExpr *Dst, const MCExpr *Value, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Set bits in a kernel descriptor MCExpr field: return ((Dst & ~Mask) | (Value << Shift))
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
static std::string computeTypeId(const FunctionType *FTy, const DataLayout &DL)
static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)
static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL, bool IsReturnType)
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
AMDGPU Assembly printer class.
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
MC infrastructure to propagate the function level resource usage info.
Analyzes how many registers and other resources are used by functions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
#define AMD_HSA_BITS_SET(dst, mask, val)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_IS_PTR64
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
R600 Assembly printer class.
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_0286E8_SPI_TMPRING_SIZE
#define FP_ROUND_MODE_DP(x)
#define C_00B84C_SCRATCH_EN
#define FP_ROUND_ROUND_TO_NEAREST
#define R_0286D0_SPI_PS_INPUT_ADDR
#define R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_0286CC_SPI_PS_INPUT_ENA
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define FP_DENORM_MODE_DP(x)
#define R_00B848_COMPUTE_PGM_RSRC1
#define FP_ROUND_MODE_SP(x)
#define FP_DENORM_MODE_SP(x)
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define S_00B02C_EXTRA_LDS_SIZE(x)
#define R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
StringSet - A set-like wrapper for the StringMap.
static const int BlockSize
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
const MCSubtargetInfo * getGlobalSTI() const
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
std::vector< std::string > DisasmLines
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
void endFunction(const MachineFunction *MF)
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
std::vector< std::string > HexLines
bool IsTargetStreamerInitialized
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
AMDGPUTargetStreamer * getTargetStreamer() const
static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)
AMDGPU target specific MCExpr operations.
static const AMDGPUMCExpr * createInstPrefSize(const MCExpr *CodeSizeBytes, MCContext &Ctx)
Create an expression for instruction prefetch size computation: min(divideCeil(CodeSizeBytes,...
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
bool isMemoryBound() const
bool isModuleEntryFunction() const
bool needsWaveLimiter() const
uint32_t getLDSSize() const
bool isEntryFunction() const
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
static bool EnableObjectLinking
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
virtual void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data)
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitISAVersion()
virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
const std::optional< AMDGPU::TargetID > & getTargetID() const
void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString)
virtual void EmitDirectiveAMDGCNTarget()
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier)
bool isXnackOnOrAny() const
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
Collects and handles AsmPrinter objects required to build debug or EH information.
This class is intended to be used as a driving class for all asm writers.
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
TargetMachine & TM
Target machine description.
MachineFunction * MF
The current machine function.
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
void emitFunctionBody()
This method emits the body and trailer for a function.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer, char &ID=AsmPrinter::ID)
MCSymbol * CurrentFnSym
The symbol for the current function.
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
const MCAsmInfo & MAI
Target Asm Printer information.
bool isVerbose() const
Return true if assembly output should contain comments.
MCSymbol * getFunctionEnd() const
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void addAsmPrinterHandler(std::unique_ptr< AsmPrinterHandler > Handler)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
A parsed version of the target data layout string in and methods for querying it.
DISubprogram * getSubprogram() const
Get the attached subprogram.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool isTgSplitEnabled() const
bool hasInstPrefSize() const
bool isCuModeEnabled() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
const AMDGPU::TargetID & getTargetID() const
void getInstPrefSizeArgs(uint32_t &Mask, uint32_t &Shift, uint32_t &Width, uint32_t &CacheLineSize) const
unsigned getMaxNumUserSGPRs() const
Generation getGeneration() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxWaveScratchSize() const
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
VisibilityTypes getVisibility() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
unsigned getAddressSpace() const
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MCCodeEmitter * getEmitterPtr() const
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCObjectFileInfo * getObjectFileInfo() const
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
LLVM_ABI bool evaluateAsRelocatable(MCValue &Res, const MCAssembler *Asm) const
Try to evaluate the expression to a relocatable value, i.e.
MCSection * getReadOnlySection() const
MCSection * getTextSection() const
MCContext & getContext() const
This represents a section on linux, lots of unix variants and some bare metal systems.
Instances of this class represent a uniqued identifier for a section in the current translation unit.
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
bool hasInstructions() const
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
StringRef getName() const
getName - Get the symbol name.
bool isVariable() const
isVariable - Check if this is a variable symbol.
void redefineIfPossible()
Prepare this symbol to be redefined.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCStreamer & getStreamer()
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
MCContext & getContext() const
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI unsigned getNumOperands() const
iterator_range< op_iterator > operands()
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getNumWaveDispatchVGPRs() const
unsigned getNumSpilledVGPRs() const
unsigned getNumWaveDispatchSGPRs() const
unsigned getNumSpilledSGPRs() const
GCNUserSGPRUsageInfo & getUserSGPRInfo()
unsigned getDynamicVGPRBlockSize() const
unsigned getMaxWavesPerEU() const
bool hasWorkGroupIDZ() const
bool hasWorkGroupIDY() const
SIModeRegisterDefaults getMode() const
bool hasWorkGroupInfo() const
bool hasWorkItemIDY() const
bool hasWorkGroupIDX() const
unsigned getNumUserSGPRs() const
unsigned getScratchReservedForDynamicVGPRs() const
bool isDynamicVGPREnabled() const
unsigned getPSInputAddr() const
bool hasWorkItemIDZ() const
unsigned getPSInputEnable() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
std::pair< typename Base::iterator, bool > insert(StringRef key)
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
bool isSGPROccupancyLimited(const MCSubtargetInfo &STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
static constexpr unsigned MaxDynamicVGPRBlocks
Maximum number of VGPR blocks that can be allocated in dynamic VGPR mode.
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
bool isGFX10Plus(const MCSubtargetInfo &STI)
AMDGPU::TargetID TargetID
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Target & getTheR600Target()
The target for R600 GPUs.
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
@ Success
The lock was released successfully.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Target & getTheGCNTarget()
The target for GCN GPUs.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Implement std::hash so that hash_code can be used in STL containers.
bool HasDynamicallySizedStack
uint64_t PrivateSegmentSize
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo FunctionResourceInfo
uint64_t kernarg_segment_byte_size
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
const MCExpr * workitem_private_segment_byte_size
const MCExpr * compute_pgm_resource2_registers
uint8_t kernarg_segment_alignment
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * wavefront_sgpr_count
const MCExpr * workitem_vgpr_count
const MCExpr * is_dynamic_callstack
uint32_t workgroup_group_segment_byte_size
const MCExpr * compute_pgm_resource1_registers
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
Track resource usage for kernels / entry functions.
const MCExpr * NumArchVGPR
const MCExpr * VGPRBlocks
const MCExpr * ScratchBlocks
const MCExpr * ComputePGMRSrc3
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
uint32_t TrapHandlerEnable
const MCExpr * NamedBarCnt
const MCExpr * ScratchEnable
const MCExpr * AccumOffset
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * SGPRBlocks
const MCExpr * NumVGPRsForWavesPerEU
const MCExpr * ScratchSize
const MCExpr * NumSGPRsForWavesPerEU
const MCExpr * getComputePGMRSrc2(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.