LLVM 23.0.0git
AMDGPUPALMetadata.cpp
Go to the documentation of this file.
1//===-- AMDGPUPALMetadata.cpp - Accumulate and print AMDGPU PAL metadata -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10///
11/// This class has methods called by AMDGPUAsmPrinter to accumulate and print
12/// the PAL metadata.
13//
14//===----------------------------------------------------------------------===//
15//
16
17#include "AMDGPUPALMetadata.h"
18#include "AMDGPUPTNote.h"
19#include "SIDefines.h"
20#include "llvm/ADT/Enum.h"
22#include "llvm/IR/Constants.h"
23#include "llvm/IR/Module.h"
24#include "llvm/MC/MCExpr.h"
28
29using namespace llvm;
30using namespace llvm::AMDGPU;
31
32// Return the PAL metadata hardware shader stage name.
33static const char *getStageName(CallingConv::ID CC) {
34 switch (CC) {
36 return ".ps";
38 return ".vs";
40 return ".gs";
42 return ".es";
44 return ".hs";
46 return ".ls";
49 llvm_unreachable("Callable shader has no hardware stage");
50 default:
51 return ".cs";
52 }
53}
54
55// Read the PAL metadata from IR metadata, where it was put by the frontend.
57 auto *NamedMD = M.getNamedMetadata("amdgpu.pal.metadata.msgpack");
58 if (NamedMD && NamedMD->getNumOperands()) {
59 // This is the new msgpack format for metadata. It is a NamedMD containing
60 // an MDTuple containing an MDString containing the msgpack data.
61 BlobType = ELF::NT_AMDGPU_METADATA;
62 auto *MDN = dyn_cast<MDTuple>(NamedMD->getOperand(0));
63 if (MDN && MDN->getNumOperands()) {
64 if (auto *MDS = dyn_cast<MDString>(MDN->getOperand(0)))
65 setFromMsgPackBlob(MDS->getString());
66 }
67 return;
68 }
69 BlobType = ELF::NT_AMD_PAL_METADATA;
70 NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
71 if (!NamedMD || !NamedMD->getNumOperands()) {
72 // Emit msgpack metadata by default
73 BlobType = ELF::NT_AMDGPU_METADATA;
74 return;
75 }
76 // This is the old reg=value pair format for metadata. It is a NamedMD
77 // containing an MDTuple containing a number of MDNodes each of which is an
78 // integer value, and each two integer values forms a key=value pair that we
79 // store as Registers[key]=value in the map.
80 auto *Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
81 if (!Tuple)
82 return;
83 for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
84 auto *Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
85 auto *Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
86 if (!Key || !Val)
87 continue;
88 setRegister(Key->getZExtValue(), Val->getZExtValue());
89 }
90}
91
92// Set PAL metadata from a binary blob from the applicable .note record.
93// Returns false if bad format. Blob must remain valid for the lifetime of the
94// Metadata.
96 BlobType = Type;
98 return setFromLegacyBlob(Blob);
99 return setFromMsgPackBlob(Blob);
100}
101
102// Set PAL metadata from legacy (array of key=value pairs) blob.
103bool AMDGPUPALMetadata::setFromLegacyBlob(StringRef Blob) {
104 const auto *Data = reinterpret_cast<const uint32_t *>(Blob.data());
105 for (unsigned I = 0; I != Blob.size() / sizeof(uint32_t) / 2; ++I)
106 setRegister(Data[I * 2], Data[I * 2 + 1]);
107 return true;
108}
109
110// Set PAL metadata from msgpack blob.
111bool AMDGPUPALMetadata::setFromMsgPackBlob(StringRef Blob) {
112 return MsgPackDoc.readFromBlob(Blob, /*Multi=*/false);
113}
114
115// Given the calling convention, calculate the register number for rsrc1. In
116// principle the register number could change in future hardware, but we know
117// it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
118// we can use fixed values.
137
138// Calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
139// with a constant offset to access any non-register shader-specific PAL
140// metadata key.
159
160// Set the rsrc1 register in the metadata for a particular shader stage.
161// In fact this ORs the value into any previous setting of the register.
163 setRegister(getRsrc1Reg(CC), Val);
164}
165
167 MCContext &Ctx) {
168 setRegister(getRsrc1Reg(CC), Val, Ctx);
169}
170
171// Set the rsrc2 register in the metadata for a particular shader stage.
172// In fact this ORs the value into any previous setting of the register.
174 setRegister(getRsrc1Reg(CC) + 1, Val);
175}
176
178 MCContext &Ctx) {
179 setRegister(getRsrc1Reg(CC) + 1, Val, Ctx);
180}
181
182// Set the SPI_PS_INPUT_ENA register in the metadata.
183// In fact this ORs the value into any previous setting of the register.
187
188// Set the SPI_PS_INPUT_ADDR register in the metadata.
189// In fact this ORs the value into any previous setting of the register.
193
194// Get a register from the metadata, or 0 if not currently set.
195unsigned AMDGPUPALMetadata::getRegister(unsigned Reg) {
196 auto Regs = getRegisters();
197 auto It = Regs.find(MsgPackDoc.getNode(Reg));
198 if (It == Regs.end())
199 return 0;
200 auto N = It->second;
201 if (N.getKind() != msgpack::Type::UInt)
202 return 0;
203 return N.getUInt();
204}
205
206// Set a register in the metadata.
207// In fact this ORs the value into any previous setting of the register.
208void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) {
209 if (!isLegacy()) {
210 // In the new MsgPack format, ignore register numbered >= 0x10000000. It
211 // is a PAL ABI pseudo-register in the old non-MsgPack format.
212 if (Reg >= 0x10000000)
213 return;
214 }
215 auto &N = getRegisters()[MsgPackDoc.getNode(Reg)];
216 if (N.getKind() == msgpack::Type::UInt)
217 Val |= N.getUInt();
218 N = N.getDocument()->getNode(Val);
219}
220
221// Set a register in the metadata.
222// In fact this ORs the value into any previous setting of the register.
223void AMDGPUPALMetadata::setRegister(unsigned Reg, const MCExpr *Val,
224 MCContext &Ctx) {
225 if (!isLegacy()) {
226 // In the new MsgPack format, ignore register numbered >= 0x10000000. It
227 // is a PAL ABI pseudo-register in the old non-MsgPack format.
228 if (Reg >= 0x10000000)
229 return;
230 }
231 auto &N = getRegisters()[MsgPackDoc.getNode(Reg)];
232 auto [ExprIt, Inserted] = REM.try_emplace(Reg);
233
234 if (!Inserted) {
235 Val = MCBinaryExpr::createOr(Val, ExprIt->getSecond(), Ctx);
236 // This conditional may be redundant most of the time, but the alternate
237 // setRegister(unsigned, unsigned) could've been called while the
238 // conditional returns true (i.e., Reg exists in REM).
239 if (N.getKind() == msgpack::Type::UInt) {
240 const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx);
241 Val = MCBinaryExpr::createOr(Val, NExpr, Ctx);
242 }
243 } else if (N.getKind() == msgpack::Type::UInt) {
244 const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx);
245 Val = MCBinaryExpr::createOr(Val, NExpr, Ctx);
246 } else {
247 // Default to uint64_t 0 so additional calls to setRegister will allow
248 // propagate ORs.
249 N = (uint64_t)0;
250 }
251 ExprIt->second = Val;
252 DelayedExprs.assignDocNode(N, msgpack::Type::UInt, Val);
253}
254
255// Set the entry point name for one shader.
257 if (isLegacy())
258 return;
259 // Msgpack format.
260 // Entry point is updated to .entry_point_symbol and is set to the function
261 // name
262 getHwStage(CC)[".entry_point_symbol"] =
263 MsgPackDoc.getNode(Name, /*Copy=*/true);
264
265 // For PAL version 3.6 and above, entry_point is no longer required.
266 if (getPALVersion() < VersionTuple(3, 6)) {
267 // Set .entry_point which is defined to be _amdgpu_<stage>_main and
268 // _amdgpu_cs_main for non-shader functions.
269 SmallString<16> EPName("_amdgpu_");
270 raw_svector_ostream EPNameOS(EPName);
271 EPNameOS << getStageName(CC) + 1 << "_main";
272 getHwStage(CC)[".entry_point"] =
273 MsgPackDoc.getNode(EPNameOS.str(), /*Copy=*/true);
274 }
275}
276
277// Set the number of used vgprs in the metadata. This is an optional
278// advisory record for logging etc; wave dispatch actually uses the rsrc1
279// register for the shader stage to determine the number of vgprs to
280// allocate.
282 if (isLegacy()) {
283 // Old non-msgpack format.
284 unsigned NumUsedVgprsKey = getScratchSizeKey(CC) +
287 setRegister(NumUsedVgprsKey, Val);
288 return;
289 }
290 // Msgpack format.
291 getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val);
292}
293
295 MCContext &Ctx) {
296 if (isLegacy()) {
297 // Old non-msgpack format.
298 unsigned NumUsedVgprsKey = getScratchSizeKey(CC) +
301 setRegister(NumUsedVgprsKey, Val, Ctx);
302 return;
303 }
304 // Msgpack format.
305 setHwStage(CC, ".vgpr_count", msgpack::Type::UInt, Val);
306}
307
308// Set the number of used agprs in the metadata.
310 getHwStage(CC)[".agpr_count"] = Val;
311}
312
313void AMDGPUPALMetadata::setNumUsedAgprs(unsigned CC, const MCExpr *Val) {
314 setHwStage(CC, ".agpr_count", msgpack::Type::UInt, Val);
315}
316
317// Set the number of used sgprs in the metadata. This is an optional advisory
318// record for logging etc; wave dispatch actually uses the rsrc1 register for
319// the shader stage to determine the number of sgprs to allocate.
321 if (isLegacy()) {
322 // Old non-msgpack format.
323 unsigned NumUsedSgprsKey = getScratchSizeKey(CC) +
326 setRegister(NumUsedSgprsKey, Val);
327 return;
328 }
329 // Msgpack format.
330 getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val);
331}
332
333void AMDGPUPALMetadata::setNumUsedSgprs(unsigned CC, const MCExpr *Val,
334 MCContext &Ctx) {
335 if (isLegacy()) {
336 // Old non-msgpack format.
337 unsigned NumUsedSgprsKey = getScratchSizeKey(CC) +
340 setRegister(NumUsedSgprsKey, Val, Ctx);
341 return;
342 }
343 // Msgpack format.
344 setHwStage(CC, ".sgpr_count", msgpack::Type::UInt, Val);
345}
346
347// Set the scratch size in the metadata.
349 if (isLegacy()) {
350 // Old non-msgpack format.
352 return;
353 }
354 // Msgpack format.
355 getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
356}
357
358void AMDGPUPALMetadata::setScratchSize(unsigned CC, const MCExpr *Val,
359 MCContext &Ctx) {
360 if (isLegacy()) {
361 // Old non-msgpack format.
362 setRegister(getScratchSizeKey(CC), Val, Ctx);
363 return;
364 }
365 // Msgpack format.
366 setHwStage(CC, ".scratch_memory_size", msgpack::Type::UInt, Val);
367}
368
369// Set the stack frame size of a function in the metadata.
371 auto Node = getShaderFunction(FnName);
372 Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
373 Node[".backend_stack_size"] = MsgPackDoc.getNode(Val);
374}
375
376// Set the amount of LDS used in bytes in the metadata.
378 auto Node = getShaderFunction(FnName);
379 Node[".lds_size"] = MsgPackDoc.getNode(Val);
380}
381
382// Set the number of used vgprs in the metadata.
384 unsigned Val) {
385 auto Node = getShaderFunction(FnName);
386 Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
387}
388
390 const MCExpr *Val) {
391 auto Node = getShaderFunction(FnName);
392 DelayedExprs.assignDocNode(Node[".vgpr_count"], msgpack::Type::UInt, Val);
393}
394
395// Set the number of used vgprs in the metadata.
397 unsigned Val) {
398 auto Node = getShaderFunction(FnName);
399 Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
400}
401
403 const MCExpr *Val) {
404 auto Node = getShaderFunction(FnName);
405 DelayedExprs.assignDocNode(Node[".sgpr_count"], msgpack::Type::UInt, Val);
406}
407
408// Set the hardware register bit in PAL metadata to enable wave32 on the
409// shader of the given calling convention.
430
431// Convert a register number to name, for display by toString().
432// Returns nullptr if none.
433static StringRef getRegisterName(unsigned RegNum) {
434 // Table of registers.
435 constexpr EnumStringDef<uint16_t> RegInfoTableDefs[] = {
436 // Registers that code generation sets/modifies metadata for.
437 {{"SPI_SHADER_PGM_RSRC1_VS"}, PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS},
438 {{"SPI_SHADER_PGM_RSRC2_VS"}, PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS + 1},
439 {{"SPI_SHADER_PGM_RSRC1_LS"}, PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS},
440 {{"SPI_SHADER_PGM_RSRC2_LS"}, PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS + 1},
441 {{"SPI_SHADER_PGM_RSRC1_HS"}, PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS},
442 {{"SPI_SHADER_PGM_RSRC2_HS"}, PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS + 1},
443 {{"SPI_SHADER_PGM_RSRC1_ES"}, PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES},
444 {{"SPI_SHADER_PGM_RSRC2_ES"}, PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES + 1},
445 {{"SPI_SHADER_PGM_RSRC1_GS"}, PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS},
446 {{"SPI_SHADER_PGM_RSRC2_GS"}, PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS + 1},
447 {{"COMPUTE_DISPATCH_INITIATOR"},
449 {{"COMPUTE_PGM_RSRC1"}, PALMD::R_2E12_COMPUTE_PGM_RSRC1},
450 {{"COMPUTE_PGM_RSRC2"}, PALMD::R_2E12_COMPUTE_PGM_RSRC1 + 1},
451 {{"SPI_SHADER_PGM_RSRC1_PS"}, PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS},
452 {{"SPI_SHADER_PGM_RSRC2_PS"}, PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS + 1},
453 {{"SPI_PS_INPUT_ENA"}, PALMD::R_A1B3_SPI_PS_INPUT_ENA},
454 {{"SPI_PS_INPUT_ADDR"}, PALMD::R_A1B4_SPI_PS_INPUT_ADDR},
455 {{"SPI_PS_IN_CONTROL"}, PALMD::R_A1B6_SPI_PS_IN_CONTROL},
456 {{"VGT_SHADER_STAGES_EN"}, PALMD::R_A2D5_VGT_SHADER_STAGES_EN},
457
458 // Registers not known to code generation.
459 {{"SPI_SHADER_PGM_RSRC3_PS"}, 0x2c07},
460 {{"SPI_SHADER_PGM_RSRC3_VS"}, 0x2c46},
461 {{"SPI_SHADER_PGM_RSRC3_GS"}, 0x2c87},
462 {{"SPI_SHADER_PGM_RSRC3_ES"}, 0x2cc7},
463 {{"SPI_SHADER_PGM_RSRC3_HS"}, 0x2d07},
464 {{"SPI_SHADER_PGM_RSRC3_LS"}, 0x2d47},
465
466 {{"SPI_SHADER_POS_FORMAT"}, 0xa1c3},
467 {{"SPI_VS_OUT_CONFIG"}, 0xa1b1},
468 {{"PA_CL_VS_OUT_CNTL"}, 0xa207},
469 {{"PA_CL_CLIP_CNTL"}, 0xa204},
470 {{"PA_CL_VTE_CNTL"}, 0xa206},
471 {{"PA_SU_VTX_CNTL"}, 0xa2f9},
472 {{"PA_SC_MODE_CNTL_1"}, 0xa293},
473 {{"VGT_PRIMITIVEID_EN"}, 0xa2a1},
474 {{"SPI_SHADER_PGM_RSRC4_GS"}, 0x2c81},
475 {{"COMPUTE_TMPRING_SIZE"}, 0x2e18},
476 {{"SPI_INTERP_CONTROL_0"}, 0xa1b5},
477 {{"SPI_TMPRING_SIZE"}, 0xa1ba},
478 {{"SPI_SHADER_Z_FORMAT"}, 0xa1c4},
479 {{"SPI_SHADER_COL_FORMAT"}, 0xa1c5},
480 {{"DB_SHADER_CONTROL"}, 0xa203},
481 {{"CB_SHADER_MASK"}, 0xa08f},
482 {{"SPI_PS_INPUT_CNTL_0"}, 0xa191},
483 {{"SPI_PS_INPUT_CNTL_1"}, 0xa192},
484 {{"SPI_PS_INPUT_CNTL_2"}, 0xa193},
485 {{"SPI_PS_INPUT_CNTL_3"}, 0xa194},
486 {{"SPI_PS_INPUT_CNTL_4"}, 0xa195},
487 {{"SPI_PS_INPUT_CNTL_5"}, 0xa196},
488 {{"SPI_PS_INPUT_CNTL_6"}, 0xa197},
489 {{"SPI_PS_INPUT_CNTL_7"}, 0xa198},
490 {{"SPI_PS_INPUT_CNTL_8"}, 0xa199},
491 {{"SPI_PS_INPUT_CNTL_9"}, 0xa19a},
492 {{"SPI_PS_INPUT_CNTL_10"}, 0xa19b},
493 {{"SPI_PS_INPUT_CNTL_11"}, 0xa19c},
494 {{"SPI_PS_INPUT_CNTL_12"}, 0xa19d},
495 {{"SPI_PS_INPUT_CNTL_13"}, 0xa19e},
496 {{"SPI_PS_INPUT_CNTL_14"}, 0xa19f},
497 {{"SPI_PS_INPUT_CNTL_15"}, 0xa1a0},
498 {{"SPI_PS_INPUT_CNTL_16"}, 0xa1a1},
499 {{"SPI_PS_INPUT_CNTL_17"}, 0xa1a2},
500 {{"SPI_PS_INPUT_CNTL_18"}, 0xa1a3},
501 {{"SPI_PS_INPUT_CNTL_19"}, 0xa1a4},
502 {{"SPI_PS_INPUT_CNTL_20"}, 0xa1a5},
503 {{"SPI_PS_INPUT_CNTL_21"}, 0xa1a6},
504 {{"SPI_PS_INPUT_CNTL_22"}, 0xa1a7},
505 {{"SPI_PS_INPUT_CNTL_23"}, 0xa1a8},
506 {{"SPI_PS_INPUT_CNTL_24"}, 0xa1a9},
507 {{"SPI_PS_INPUT_CNTL_25"}, 0xa1aa},
508 {{"SPI_PS_INPUT_CNTL_26"}, 0xa1ab},
509 {{"SPI_PS_INPUT_CNTL_27"}, 0xa1ac},
510 {{"SPI_PS_INPUT_CNTL_28"}, 0xa1ad},
511 {{"SPI_PS_INPUT_CNTL_29"}, 0xa1ae},
512 {{"SPI_PS_INPUT_CNTL_30"}, 0xa1af},
513 {{"SPI_PS_INPUT_CNTL_31"}, 0xa1b0},
514
515 {{"VGT_GS_MAX_VERT_OUT"}, 0xa2ce},
516 {{"VGT_ESGS_RING_ITEMSIZE"}, 0xa2ab},
517 {{"VGT_GS_MODE"}, 0xa290},
518 {{"VGT_GS_ONCHIP_CNTL"}, 0xa291},
519 {{"VGT_GS_VERT_ITEMSIZE"}, 0xa2d7},
520 {{"VGT_GS_VERT_ITEMSIZE_1"}, 0xa2d8},
521 {{"VGT_GS_VERT_ITEMSIZE_2"}, 0xa2d9},
522 {{"VGT_GS_VERT_ITEMSIZE_3"}, 0xa2da},
523 {{"VGT_GSVS_RING_OFFSET_1"}, 0xa298},
524 {{"VGT_GSVS_RING_OFFSET_2"}, 0xa299},
525 {{"VGT_GSVS_RING_OFFSET_3"}, 0xa29a},
526
527 {{"VGT_GS_INSTANCE_CNT"}, 0xa2e4},
528 {{"VGT_GS_PER_VS"}, 0xa297},
529 {{"VGT_GS_OUT_PRIM_TYPE"}, 0xa29b},
530 {{"VGT_GSVS_RING_ITEMSIZE"}, 0xa2ac},
531
532 {{"VGT_REUSE_OFF"}, 0xa2ad},
533 {{"SPI_BARYC_CNTL"}, 0xa1b8},
534
535 {{"SPI_SHADER_USER_DATA_VS_0"}, 0x2c4c},
536 {{"SPI_SHADER_USER_DATA_VS_1"}, 0x2c4d},
537 {{"SPI_SHADER_USER_DATA_VS_2"}, 0x2c4e},
538 {{"SPI_SHADER_USER_DATA_VS_3"}, 0x2c4f},
539 {{"SPI_SHADER_USER_DATA_VS_4"}, 0x2c50},
540 {{"SPI_SHADER_USER_DATA_VS_5"}, 0x2c51},
541 {{"SPI_SHADER_USER_DATA_VS_6"}, 0x2c52},
542 {{"SPI_SHADER_USER_DATA_VS_7"}, 0x2c53},
543 {{"SPI_SHADER_USER_DATA_VS_8"}, 0x2c54},
544 {{"SPI_SHADER_USER_DATA_VS_9"}, 0x2c55},
545 {{"SPI_SHADER_USER_DATA_VS_10"}, 0x2c56},
546 {{"SPI_SHADER_USER_DATA_VS_11"}, 0x2c57},
547 {{"SPI_SHADER_USER_DATA_VS_12"}, 0x2c58},
548 {{"SPI_SHADER_USER_DATA_VS_13"}, 0x2c59},
549 {{"SPI_SHADER_USER_DATA_VS_14"}, 0x2c5a},
550 {{"SPI_SHADER_USER_DATA_VS_15"}, 0x2c5b},
551 {{"SPI_SHADER_USER_DATA_VS_16"}, 0x2c5c},
552 {{"SPI_SHADER_USER_DATA_VS_17"}, 0x2c5d},
553 {{"SPI_SHADER_USER_DATA_VS_18"}, 0x2c5e},
554 {{"SPI_SHADER_USER_DATA_VS_19"}, 0x2c5f},
555 {{"SPI_SHADER_USER_DATA_VS_20"}, 0x2c60},
556 {{"SPI_SHADER_USER_DATA_VS_21"}, 0x2c61},
557 {{"SPI_SHADER_USER_DATA_VS_22"}, 0x2c62},
558 {{"SPI_SHADER_USER_DATA_VS_23"}, 0x2c63},
559 {{"SPI_SHADER_USER_DATA_VS_24"}, 0x2c64},
560 {{"SPI_SHADER_USER_DATA_VS_25"}, 0x2c65},
561 {{"SPI_SHADER_USER_DATA_VS_26"}, 0x2c66},
562 {{"SPI_SHADER_USER_DATA_VS_27"}, 0x2c67},
563 {{"SPI_SHADER_USER_DATA_VS_28"}, 0x2c68},
564 {{"SPI_SHADER_USER_DATA_VS_29"}, 0x2c69},
565 {{"SPI_SHADER_USER_DATA_VS_30"}, 0x2c6a},
566 {{"SPI_SHADER_USER_DATA_VS_31"}, 0x2c6b},
567
568 {{"SPI_SHADER_USER_DATA_GS_0"}, 0x2c8c},
569 {{"SPI_SHADER_USER_DATA_GS_1"}, 0x2c8d},
570 {{"SPI_SHADER_USER_DATA_GS_2"}, 0x2c8e},
571 {{"SPI_SHADER_USER_DATA_GS_3"}, 0x2c8f},
572 {{"SPI_SHADER_USER_DATA_GS_4"}, 0x2c90},
573 {{"SPI_SHADER_USER_DATA_GS_5"}, 0x2c91},
574 {{"SPI_SHADER_USER_DATA_GS_6"}, 0x2c92},
575 {{"SPI_SHADER_USER_DATA_GS_7"}, 0x2c93},
576 {{"SPI_SHADER_USER_DATA_GS_8"}, 0x2c94},
577 {{"SPI_SHADER_USER_DATA_GS_9"}, 0x2c95},
578 {{"SPI_SHADER_USER_DATA_GS_10"}, 0x2c96},
579 {{"SPI_SHADER_USER_DATA_GS_11"}, 0x2c97},
580 {{"SPI_SHADER_USER_DATA_GS_12"}, 0x2c98},
581 {{"SPI_SHADER_USER_DATA_GS_13"}, 0x2c99},
582 {{"SPI_SHADER_USER_DATA_GS_14"}, 0x2c9a},
583 {{"SPI_SHADER_USER_DATA_GS_15"}, 0x2c9b},
584 {{"SPI_SHADER_USER_DATA_GS_16"}, 0x2c9c},
585 {{"SPI_SHADER_USER_DATA_GS_17"}, 0x2c9d},
586 {{"SPI_SHADER_USER_DATA_GS_18"}, 0x2c9e},
587 {{"SPI_SHADER_USER_DATA_GS_19"}, 0x2c9f},
588 {{"SPI_SHADER_USER_DATA_GS_20"}, 0x2ca0},
589 {{"SPI_SHADER_USER_DATA_GS_21"}, 0x2ca1},
590 {{"SPI_SHADER_USER_DATA_GS_22"}, 0x2ca2},
591 {{"SPI_SHADER_USER_DATA_GS_23"}, 0x2ca3},
592 {{"SPI_SHADER_USER_DATA_GS_24"}, 0x2ca4},
593 {{"SPI_SHADER_USER_DATA_GS_25"}, 0x2ca5},
594 {{"SPI_SHADER_USER_DATA_GS_26"}, 0x2ca6},
595 {{"SPI_SHADER_USER_DATA_GS_27"}, 0x2ca7},
596 {{"SPI_SHADER_USER_DATA_GS_28"}, 0x2ca8},
597 {{"SPI_SHADER_USER_DATA_GS_29"}, 0x2ca9},
598 {{"SPI_SHADER_USER_DATA_GS_30"}, 0x2caa},
599 {{"SPI_SHADER_USER_DATA_GS_31"}, 0x2cab},
600
601 {{"SPI_SHADER_USER_DATA_ES_0"}, 0x2ccc},
602 {{"SPI_SHADER_USER_DATA_ES_1"}, 0x2ccd},
603 {{"SPI_SHADER_USER_DATA_ES_2"}, 0x2cce},
604 {{"SPI_SHADER_USER_DATA_ES_3"}, 0x2ccf},
605 {{"SPI_SHADER_USER_DATA_ES_4"}, 0x2cd0},
606 {{"SPI_SHADER_USER_DATA_ES_5"}, 0x2cd1},
607 {{"SPI_SHADER_USER_DATA_ES_6"}, 0x2cd2},
608 {{"SPI_SHADER_USER_DATA_ES_7"}, 0x2cd3},
609 {{"SPI_SHADER_USER_DATA_ES_8"}, 0x2cd4},
610 {{"SPI_SHADER_USER_DATA_ES_9"}, 0x2cd5},
611 {{"SPI_SHADER_USER_DATA_ES_10"}, 0x2cd6},
612 {{"SPI_SHADER_USER_DATA_ES_11"}, 0x2cd7},
613 {{"SPI_SHADER_USER_DATA_ES_12"}, 0x2cd8},
614 {{"SPI_SHADER_USER_DATA_ES_13"}, 0x2cd9},
615 {{"SPI_SHADER_USER_DATA_ES_14"}, 0x2cda},
616 {{"SPI_SHADER_USER_DATA_ES_15"}, 0x2cdb},
617 {{"SPI_SHADER_USER_DATA_ES_16"}, 0x2cdc},
618 {{"SPI_SHADER_USER_DATA_ES_17"}, 0x2cdd},
619 {{"SPI_SHADER_USER_DATA_ES_18"}, 0x2cde},
620 {{"SPI_SHADER_USER_DATA_ES_19"}, 0x2cdf},
621 {{"SPI_SHADER_USER_DATA_ES_20"}, 0x2ce0},
622 {{"SPI_SHADER_USER_DATA_ES_21"}, 0x2ce1},
623 {{"SPI_SHADER_USER_DATA_ES_22"}, 0x2ce2},
624 {{"SPI_SHADER_USER_DATA_ES_23"}, 0x2ce3},
625 {{"SPI_SHADER_USER_DATA_ES_24"}, 0x2ce4},
626 {{"SPI_SHADER_USER_DATA_ES_25"}, 0x2ce5},
627 {{"SPI_SHADER_USER_DATA_ES_26"}, 0x2ce6},
628 {{"SPI_SHADER_USER_DATA_ES_27"}, 0x2ce7},
629 {{"SPI_SHADER_USER_DATA_ES_28"}, 0x2ce8},
630 {{"SPI_SHADER_USER_DATA_ES_29"}, 0x2ce9},
631 {{"SPI_SHADER_USER_DATA_ES_30"}, 0x2cea},
632 {{"SPI_SHADER_USER_DATA_ES_31"}, 0x2ceb},
633
634 {{"SPI_SHADER_USER_DATA_PS_0"}, 0x2c0c},
635 {{"SPI_SHADER_USER_DATA_PS_1"}, 0x2c0d},
636 {{"SPI_SHADER_USER_DATA_PS_2"}, 0x2c0e},
637 {{"SPI_SHADER_USER_DATA_PS_3"}, 0x2c0f},
638 {{"SPI_SHADER_USER_DATA_PS_4"}, 0x2c10},
639 {{"SPI_SHADER_USER_DATA_PS_5"}, 0x2c11},
640 {{"SPI_SHADER_USER_DATA_PS_6"}, 0x2c12},
641 {{"SPI_SHADER_USER_DATA_PS_7"}, 0x2c13},
642 {{"SPI_SHADER_USER_DATA_PS_8"}, 0x2c14},
643 {{"SPI_SHADER_USER_DATA_PS_9"}, 0x2c15},
644 {{"SPI_SHADER_USER_DATA_PS_10"}, 0x2c16},
645 {{"SPI_SHADER_USER_DATA_PS_11"}, 0x2c17},
646 {{"SPI_SHADER_USER_DATA_PS_12"}, 0x2c18},
647 {{"SPI_SHADER_USER_DATA_PS_13"}, 0x2c19},
648 {{"SPI_SHADER_USER_DATA_PS_14"}, 0x2c1a},
649 {{"SPI_SHADER_USER_DATA_PS_15"}, 0x2c1b},
650 {{"SPI_SHADER_USER_DATA_PS_16"}, 0x2c1c},
651 {{"SPI_SHADER_USER_DATA_PS_17"}, 0x2c1d},
652 {{"SPI_SHADER_USER_DATA_PS_18"}, 0x2c1e},
653 {{"SPI_SHADER_USER_DATA_PS_19"}, 0x2c1f},
654 {{"SPI_SHADER_USER_DATA_PS_20"}, 0x2c20},
655 {{"SPI_SHADER_USER_DATA_PS_21"}, 0x2c21},
656 {{"SPI_SHADER_USER_DATA_PS_22"}, 0x2c22},
657 {{"SPI_SHADER_USER_DATA_PS_23"}, 0x2c23},
658 {{"SPI_SHADER_USER_DATA_PS_24"}, 0x2c24},
659 {{"SPI_SHADER_USER_DATA_PS_25"}, 0x2c25},
660 {{"SPI_SHADER_USER_DATA_PS_26"}, 0x2c26},
661 {{"SPI_SHADER_USER_DATA_PS_27"}, 0x2c27},
662 {{"SPI_SHADER_USER_DATA_PS_28"}, 0x2c28},
663 {{"SPI_SHADER_USER_DATA_PS_29"}, 0x2c29},
664 {{"SPI_SHADER_USER_DATA_PS_30"}, 0x2c2a},
665 {{"SPI_SHADER_USER_DATA_PS_31"}, 0x2c2b},
666
667 {{"COMPUTE_USER_DATA_0"}, 0x2e40},
668 {{"COMPUTE_USER_DATA_1"}, 0x2e41},
669 {{"COMPUTE_USER_DATA_2"}, 0x2e42},
670 {{"COMPUTE_USER_DATA_3"}, 0x2e43},
671 {{"COMPUTE_USER_DATA_4"}, 0x2e44},
672 {{"COMPUTE_USER_DATA_5"}, 0x2e45},
673 {{"COMPUTE_USER_DATA_6"}, 0x2e46},
674 {{"COMPUTE_USER_DATA_7"}, 0x2e47},
675 {{"COMPUTE_USER_DATA_8"}, 0x2e48},
676 {{"COMPUTE_USER_DATA_9"}, 0x2e49},
677 {{"COMPUTE_USER_DATA_10"}, 0x2e4a},
678 {{"COMPUTE_USER_DATA_11"}, 0x2e4b},
679 {{"COMPUTE_USER_DATA_12"}, 0x2e4c},
680 {{"COMPUTE_USER_DATA_13"}, 0x2e4d},
681 {{"COMPUTE_USER_DATA_14"}, 0x2e4e},
682 {{"COMPUTE_USER_DATA_15"}, 0x2e4f},
683 {{"COMPUTE_USER_DATA_16"}, 0x2e50},
684 {{"COMPUTE_USER_DATA_17"}, 0x2e51},
685 {{"COMPUTE_USER_DATA_18"}, 0x2e52},
686 {{"COMPUTE_USER_DATA_19"}, 0x2e53},
687 {{"COMPUTE_USER_DATA_20"}, 0x2e54},
688 {{"COMPUTE_USER_DATA_21"}, 0x2e55},
689 {{"COMPUTE_USER_DATA_22"}, 0x2e56},
690 {{"COMPUTE_USER_DATA_23"}, 0x2e57},
691 {{"COMPUTE_USER_DATA_24"}, 0x2e58},
692 {{"COMPUTE_USER_DATA_25"}, 0x2e59},
693 {{"COMPUTE_USER_DATA_26"}, 0x2e5a},
694 {{"COMPUTE_USER_DATA_27"}, 0x2e5b},
695 {{"COMPUTE_USER_DATA_28"}, 0x2e5c},
696 {{"COMPUTE_USER_DATA_29"}, 0x2e5d},
697 {{"COMPUTE_USER_DATA_30"}, 0x2e5e},
698 {{"COMPUTE_USER_DATA_31"}, 0x2e5f},
699
700 {{"COMPUTE_NUM_THREAD_X"}, 0x2e07},
701 {{"COMPUTE_NUM_THREAD_Y"}, 0x2e08},
702 {{"COMPUTE_NUM_THREAD_Z"}, 0x2e09},
703 {{"VGT_TF_PARAM"}, 0xa2db},
704 {{"VGT_LS_HS_CONFIG"}, 0xa2d6},
705 {{"VGT_HOS_MIN_TESS_LEVEL"}, 0xa287},
706 {{"VGT_HOS_MAX_TESS_LEVEL"}, 0xa286},
707 {{"PA_SC_AA_CONFIG"}, 0xa2f8},
708 {{"PA_SC_SHADER_CONTROL"}, 0xa310},
709 {{"PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"}, 0xa313},
710
711 {{"SPI_SHADER_USER_DATA_HS_0"}, 0x2d0c},
712 {{"SPI_SHADER_USER_DATA_HS_1"}, 0x2d0d},
713 {{"SPI_SHADER_USER_DATA_HS_2"}, 0x2d0e},
714 {{"SPI_SHADER_USER_DATA_HS_3"}, 0x2d0f},
715 {{"SPI_SHADER_USER_DATA_HS_4"}, 0x2d10},
716 {{"SPI_SHADER_USER_DATA_HS_5"}, 0x2d11},
717 {{"SPI_SHADER_USER_DATA_HS_6"}, 0x2d12},
718 {{"SPI_SHADER_USER_DATA_HS_7"}, 0x2d13},
719 {{"SPI_SHADER_USER_DATA_HS_8"}, 0x2d14},
720 {{"SPI_SHADER_USER_DATA_HS_9"}, 0x2d15},
721 {{"SPI_SHADER_USER_DATA_HS_10"}, 0x2d16},
722 {{"SPI_SHADER_USER_DATA_HS_11"}, 0x2d17},
723 {{"SPI_SHADER_USER_DATA_HS_12"}, 0x2d18},
724 {{"SPI_SHADER_USER_DATA_HS_13"}, 0x2d19},
725 {{"SPI_SHADER_USER_DATA_HS_14"}, 0x2d1a},
726 {{"SPI_SHADER_USER_DATA_HS_15"}, 0x2d1b},
727 {{"SPI_SHADER_USER_DATA_HS_16"}, 0x2d1c},
728 {{"SPI_SHADER_USER_DATA_HS_17"}, 0x2d1d},
729 {{"SPI_SHADER_USER_DATA_HS_18"}, 0x2d1e},
730 {{"SPI_SHADER_USER_DATA_HS_19"}, 0x2d1f},
731 {{"SPI_SHADER_USER_DATA_HS_20"}, 0x2d20},
732 {{"SPI_SHADER_USER_DATA_HS_21"}, 0x2d21},
733 {{"SPI_SHADER_USER_DATA_HS_22"}, 0x2d22},
734 {{"SPI_SHADER_USER_DATA_HS_23"}, 0x2d23},
735 {{"SPI_SHADER_USER_DATA_HS_24"}, 0x2d24},
736 {{"SPI_SHADER_USER_DATA_HS_25"}, 0x2d25},
737 {{"SPI_SHADER_USER_DATA_HS_26"}, 0x2d26},
738 {{"SPI_SHADER_USER_DATA_HS_27"}, 0x2d27},
739 {{"SPI_SHADER_USER_DATA_HS_28"}, 0x2d28},
740 {{"SPI_SHADER_USER_DATA_HS_29"}, 0x2d29},
741 {{"SPI_SHADER_USER_DATA_HS_30"}, 0x2d2a},
742 {{"SPI_SHADER_USER_DATA_HS_31"}, 0x2d2b},
743
744 {{"SPI_SHADER_USER_DATA_LS_0"}, 0x2d4c},
745 {{"SPI_SHADER_USER_DATA_LS_1"}, 0x2d4d},
746 {{"SPI_SHADER_USER_DATA_LS_2"}, 0x2d4e},
747 {{"SPI_SHADER_USER_DATA_LS_3"}, 0x2d4f},
748 {{"SPI_SHADER_USER_DATA_LS_4"}, 0x2d50},
749 {{"SPI_SHADER_USER_DATA_LS_5"}, 0x2d51},
750 {{"SPI_SHADER_USER_DATA_LS_6"}, 0x2d52},
751 {{"SPI_SHADER_USER_DATA_LS_7"}, 0x2d53},
752 {{"SPI_SHADER_USER_DATA_LS_8"}, 0x2d54},
753 {{"SPI_SHADER_USER_DATA_LS_9"}, 0x2d55},
754 {{"SPI_SHADER_USER_DATA_LS_10"}, 0x2d56},
755 {{"SPI_SHADER_USER_DATA_LS_11"}, 0x2d57},
756 {{"SPI_SHADER_USER_DATA_LS_12"}, 0x2d58},
757 {{"SPI_SHADER_USER_DATA_LS_13"}, 0x2d59},
758 {{"SPI_SHADER_USER_DATA_LS_14"}, 0x2d5a},
759 {{"SPI_SHADER_USER_DATA_LS_15"}, 0x2d5b},
760
761 {{"IA_MULTI_VGT_PARAM"}, 0xa2aa},
762 {{"VGT_GS_MAX_PRIMS_PER_SUBGROUP"}, 0xa2a5},
763 {{"VGT_STRMOUT_BUFFER_CONFIG"}, 0xa2e6},
764 {{"VGT_STRMOUT_CONFIG"}, 0xa2e5},
765 {{"VGT_STRMOUT_VTX_STRIDE_0"}, 0xa2b5},
766 {{"VGT_STRMOUT_VTX_STRIDE_1"}, 0xa2b9},
767 {{"VGT_STRMOUT_VTX_STRIDE_2"}, 0xa2bd},
768 {{"VGT_STRMOUT_VTX_STRIDE_3"}, 0xa2c1},
769 {{"VGT_VERTEX_REUSE_BLOCK_CNTL"}, 0xa316},
770
771 {{"COMPUTE_PGM_RSRC3"}, 0x2e28},
772 {{"COMPUTE_SHADER_CHKSUM"}, 0x2e2a},
773 {{"COMPUTE_USER_ACCUM_0"}, 0x2e24},
774 {{"COMPUTE_USER_ACCUM_1"}, 0x2e25},
775 {{"COMPUTE_USER_ACCUM_2"}, 0x2e26},
776 {{"COMPUTE_USER_ACCUM_3"}, 0x2e27},
777 {{"GE_MAX_OUTPUT_PER_SUBGROUP"}, 0xa1ff},
778 {{"GE_NGG_SUBGRP_CNTL"}, 0xa2d3},
779 {{"GE_STEREO_CNTL"}, 0xc25f},
780 {{"GE_USER_VGPR_EN"}, 0xc262},
781 {{"IA_MULTI_VGT_PARAM_PIPED"}, 0xc258},
782 {{"PA_STEREO_CNTL"}, 0xa210},
783 {{"SPI_SHADER_IDX_FORMAT"}, 0xa1c2},
784 {{"SPI_SHADER_PGM_CHKSUM_GS"}, 0x2c80},
785 {{"SPI_SHADER_PGM_CHKSUM_HS"}, 0x2d00},
786 {{"SPI_SHADER_PGM_CHKSUM_PS"}, 0x2c06},
787 {{"SPI_SHADER_PGM_CHKSUM_VS"}, 0x2c45},
788 {{"SPI_SHADER_PGM_LO_GS"}, 0x2c88},
789 {{"SPI_SHADER_USER_ACCUM_ESGS_0"}, 0x2cb2},
790 {{"SPI_SHADER_USER_ACCUM_ESGS_1"}, 0x2cb3},
791 {{"SPI_SHADER_USER_ACCUM_ESGS_2"}, 0x2cb4},
792 {{"SPI_SHADER_USER_ACCUM_ESGS_3"}, 0x2cb5},
793 {{"SPI_SHADER_USER_ACCUM_LSHS_0"}, 0x2d32},
794 {{"SPI_SHADER_USER_ACCUM_LSHS_1"}, 0x2d33},
795 {{"SPI_SHADER_USER_ACCUM_LSHS_2"}, 0x2d34},
796 {{"SPI_SHADER_USER_ACCUM_LSHS_3"}, 0x2d35},
797 {{"SPI_SHADER_USER_ACCUM_PS_0"}, 0x2c32},
798 {{"SPI_SHADER_USER_ACCUM_PS_1"}, 0x2c33},
799 {{"SPI_SHADER_USER_ACCUM_PS_2"}, 0x2c34},
800 {{"SPI_SHADER_USER_ACCUM_PS_3"}, 0x2c35},
801 {{"SPI_SHADER_USER_ACCUM_VS_0"}, 0x2c72},
802 {{"SPI_SHADER_USER_ACCUM_VS_1"}, 0x2c73},
803 {{"SPI_SHADER_USER_ACCUM_VS_2"}, 0x2c74},
804 {{"SPI_SHADER_USER_ACCUM_VS_3"}, 0x2c75},
805 };
806 static constexpr auto RegInfoTable = BUILD_ENUM_STRINGS(RegInfoTableDefs);
807 return EnumStrings(RegInfoTable).toString(RegNum);
808}
809
810// Convert the accumulated PAL metadata into an asm directive.
812 String.clear();
813 if (!BlobType)
814 return;
815 ResolvedAll = DelayedExprs.resolveDelayedExpressions();
817 if (isLegacy()) {
818 if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil)
819 return;
820 // Old linear reg=val format.
821 Stream << '\t' << AMDGPU::PALMD::AssemblerDirective << ' ';
822 auto Regs = getRegisters();
823 for (auto I = Regs.begin(), E = Regs.end(); I != E; ++I) {
824 if (I != Regs.begin())
825 Stream << ',';
826 unsigned Reg = I->first.getUInt();
827 unsigned Val = I->second.getUInt();
828 Stream << "0x" << Twine::utohexstr(Reg) << ",0x" << Twine::utohexstr(Val);
829 }
830 Stream << '\n';
831 return;
832 }
833
834 // New msgpack-based format -- output as YAML (with unsigned numbers in hex),
835 // but first change the registers map to use names.
836 MsgPackDoc.setHexMode();
837 auto &RegsObj = refRegisters();
838 auto OrigRegs = RegsObj.getMap();
839 RegsObj = MsgPackDoc.getMapNode();
840 for (auto I : OrigRegs) {
841 auto Key = I.first;
842 if (StringRef RegName = getRegisterName(Key.getUInt()); !RegName.empty()) {
843 std::string KeyName = Key.toString();
844 KeyName += " (";
845 KeyName += RegName;
846 KeyName += ')';
847 Key = MsgPackDoc.getNode(KeyName, /*Copy=*/true);
848 }
849 RegsObj.getMap()[Key] = I.second;
850 }
851
852 // Output as YAML.
853 Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveBegin << '\n';
854 MsgPackDoc.toYAML(Stream);
855 Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveEnd << '\n';
856
857 // Restore original registers map.
858 RegsObj = OrigRegs;
859}
860
861// Convert the accumulated PAL metadata into a binary blob for writing as
862// a .note record of the specified AMD type. Returns an empty blob if
863// there is no PAL metadata,
864void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
865 ResolvedAll = DelayedExprs.resolveDelayedExpressions();
867 toLegacyBlob(Blob);
868 else if (Type)
869 toMsgPackBlob(Blob);
870}
871
872void AMDGPUPALMetadata::toLegacyBlob(std::string &Blob) {
873 Blob.clear();
874 auto Registers = getRegisters();
875 if (Registers.getMap().empty())
876 return;
877 raw_string_ostream OS(Blob);
879 for (auto I : Registers.getMap()) {
880 EW.write(uint32_t(I.first.getUInt()));
881 EW.write(uint32_t(I.second.getUInt()));
882 }
883}
884
885void AMDGPUPALMetadata::toMsgPackBlob(std::string &Blob) {
886 Blob.clear();
887 MsgPackDoc.writeToBlob(Blob);
888}
889
890// Set PAL metadata from YAML text. Returns false if failed.
892 BlobType = ELF::NT_AMDGPU_METADATA;
893 if (!MsgPackDoc.fromYAML(S))
894 return false;
895
896 // In the registers map, some keys may be of the form "0xa191
897 // (SPI_PS_INPUT_CNTL_0)", in which case the YAML input code made it a
898 // string. We need to turn it into a number.
899 auto &RegsObj = refRegisters();
900 auto OrigRegs = RegsObj;
901 RegsObj = MsgPackDoc.getMapNode();
902 Registers = RegsObj.getMap();
903 bool Ok = true;
904 for (auto I : OrigRegs.getMap()) {
905 auto Key = I.first;
906 if (Key.getKind() == msgpack::Type::String) {
907 StringRef S = Key.getString();
908 uint64_t Val;
909 if (S.consumeInteger(0, Val)) {
910 Ok = false;
911 errs() << "Unrecognized PAL metadata register key '" << S << "'\n";
912 continue;
913 }
914 Key = MsgPackDoc.getNode(Val);
915 }
916 Registers.getMap()[Key] = I.second;
917 }
918 return Ok;
919}
920
921// Reference (create if necessary) the node for the registers map.
922msgpack::DocNode &AMDGPUPALMetadata::refRegisters() {
923 auto &N =
924 MsgPackDoc.getRoot()
925 .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
926 .getArray(/*Convert=*/true)[0]
927 .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".registers")];
928 N.getMap(/*Convert=*/true);
929 return N;
930}
931
932// Get (create if necessary) the registers map.
933msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
934 if (Registers.isEmpty())
935 Registers = refRegisters();
936 return Registers.getMap();
937}
938
939// Reference (create if necessary) the node for the shader functions map.
940msgpack::DocNode &AMDGPUPALMetadata::refShaderFunctions() {
941 auto &N =
942 MsgPackDoc.getRoot()
943 .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
944 .getArray(/*Convert=*/true)[0]
945 .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".shader_functions")];
946 N.getMap(/*Convert=*/true);
947 return N;
948}
949
950// Get (create if necessary) the shader functions map.
951msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
952 if (ShaderFunctions.isEmpty())
953 ShaderFunctions = refShaderFunctions();
954 return ShaderFunctions.getMap();
955}
956
957// Get (create if necessary) a function in the shader functions map.
958msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) {
959 auto Functions = getShaderFunctions();
960 return Functions[Name].getMap(/*Convert=*/true);
961}
962
963msgpack::DocNode &AMDGPUPALMetadata::refComputeRegisters() {
964 auto &N =
965 MsgPackDoc.getRoot()
966 .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
967 .getArray(/*Convert=*/true)[0]
968 .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".compute_registers")];
969 N.getMap(/*Convert=*/true);
970 return N;
971}
972
973msgpack::MapDocNode AMDGPUPALMetadata::getComputeRegisters() {
974 if (ComputeRegisters.isEmpty())
975 ComputeRegisters = refComputeRegisters();
976 return ComputeRegisters.getMap();
977}
978
979msgpack::DocNode &AMDGPUPALMetadata::refGraphicsRegisters() {
980 auto &N =
981 MsgPackDoc.getRoot()
982 .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
983 .getArray(/*Convert=*/true)[0]
984 .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".graphics_registers")];
985 N.getMap(/*Convert=*/true);
986 return N;
987}
988
989msgpack::MapDocNode AMDGPUPALMetadata::getGraphicsRegisters() {
990 if (GraphicsRegisters.isEmpty())
991 GraphicsRegisters = refGraphicsRegisters();
992 return GraphicsRegisters.getMap();
993}
994
995msgpack::DocNode &AMDGPUPALMetadata::refHwStage() {
996 auto &N =
997 MsgPackDoc.getRoot()
998 .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
999 .getArray(/*Convert=*/true)[0]
1000 .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".hardware_stages")];
1001 N.getMap(/*Convert=*/true);
1002 return N;
1003}
1004
1005// Get (create if necessary) the .hardware_stages entry for the given calling
1006// convention.
1007msgpack::MapDocNode AMDGPUPALMetadata::getHwStage(unsigned CC) {
1008 if (HwStages.isEmpty())
1009 HwStages = refHwStage();
1010 return HwStages.getMap()[getStageName(CC)].getMap(/*Convert=*/true);
1011}
1012
1013// Get .note record vendor name of metadata blob to be emitted.
1014const char *AMDGPUPALMetadata::getVendor() const {
1015 return isLegacy() ? ElfNote::NoteNameV2 : ElfNote::NoteNameV3;
1016}
1017
1018// Get .note record type of metadata blob to be emitted:
1019// ELF::NT_AMD_PAL_METADATA (legacy key=val format), or
1020// ELF::NT_AMDGPU_METADATA (MsgPack format), or
1021// 0 (no PAL metadata).
1023 return BlobType;
1024}
1025
1026// Return whether the blob type is legacy PAL metadata.
1027bool AMDGPUPALMetadata::isLegacy() const {
1028 return BlobType == ELF::NT_AMD_PAL_METADATA;
1029}
1030
1031// Set legacy PAL metadata format.
1035
1036// Erase all PAL metadata.
1038 MsgPackDoc.clear();
1039 REM.clear();
1040 DelayedExprs.clear();
1041 Registers = MsgPackDoc.getEmptyNode();
1042 HwStages = MsgPackDoc.getEmptyNode();
1043 ShaderFunctions = MsgPackDoc.getEmptyNode();
1044}
1045
1047 return ResolvedAll && DelayedExprs.empty();
1048}
1049
1050unsigned AMDGPUPALMetadata::getPALVersion(unsigned idx) {
1051 assert(idx < 2 &&
1052 "illegal index to PAL version - should be 0 (major) or 1 (minor)");
1053 if (!VersionChecked) {
1054 if (Version.isEmpty()) {
1055 auto &M = MsgPackDoc.getRoot().getMap(/*Convert=*/true);
1056 auto I = M.find(MsgPackDoc.getNode("amdpal.version"));
1057 if (I != M.end())
1058 Version = I->second;
1059 }
1060 VersionChecked = true;
1061 }
1062 if (Version.isEmpty())
1063 // Default to 2.6 if there's no version info
1064 return idx ? 6 : 2;
1065 return Version.getArray()[idx].getUInt();
1066}
1067
1069
1071
1075
1076// Set the field in a given .hardware_stages entry to a maximum value
1078 unsigned Val) {
1079 msgpack::MapDocNode HwStageFieldMapNode = getHwStage(CC);
1080 auto &Node = HwStageFieldMapNode[field];
1081 if (Node.isEmpty())
1082 Node = Val;
1083 else
1084 Node = std::max<unsigned>(Node.getUInt(), Val);
1085}
1086
1087// Set the field in a given .hardware_stages entry
1088void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, unsigned Val) {
1089 getHwStage(CC)[field] = Val;
1090}
1091
1092void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, bool Val) {
1093 getHwStage(CC)[field] = Val;
1094}
1095
1097 msgpack::Type Type, const MCExpr *Val) {
1098 DelayedExprs.assignDocNode(getHwStage(CC)[field], Type, Val);
1099}
1100
1102 getComputeRegisters()[field] = Val;
1103}
1104
1106 getComputeRegisters()[field] = Val;
1107}
1108
1110 auto M = getComputeRegisters();
1111 auto I = M.find(field);
1112 return I == M.end() ? nullptr : &I->second;
1113}
1114
1116 if (auto *N = refComputeRegister(field))
1117 return N->getUInt() == Val;
1118 return false;
1119}
1120
1122 if (auto *N = refComputeRegister(field))
1123 return N->getBool() == Val;
1124 return false;
1125}
1126
1128 getGraphicsRegisters()[field] = Val;
1129}
1130
1132 getGraphicsRegisters()[field] = Val;
1133}
1134
1136 unsigned Val) {
1137 getGraphicsRegisters()[field1].getMap(true)[field2] = Val;
1138}
1139
1141 bool Val) {
1142 getGraphicsRegisters()[field1].getMap(true)[field2] = Val;
1143}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU metadata definitions and in-memory representations.
static unsigned getScratchSizeKey(CallingConv::ID CC)
static unsigned getRsrc1Reg(CallingConv::ID CC)
static const char * getStageName(CallingConv::ID CC)
PAL metadata handling.
Enums and constants for AMDGPU PT_NOTE sections.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define BUILD_ENUM_STRINGS(Tab)
Definition Enum.h:120
Module.h This file contains the declarations for the Module class.
#define RegName(no)
#define I(x, y, z)
Definition MD5.cpp:57
static std::string getRegisterName(const TargetRegisterInfo *TRI, Register Reg)
#define S_0286D8_PS_W32_EN(x)
Definition SIDefines.h:1513
#define S_00B800_CS_W32_EN(x)
Definition SIDefines.h:1515
#define S_028B54_GS_W32_EN(x)
Definition SIDefines.h:1510
#define S_028B54_VS_W32_EN(x)
Definition SIDefines.h:1511
#define S_028B54_HS_W32_EN(x)
Definition SIDefines.h:1509
SI Pre allocate WWM Registers
Defines the llvm::VersionTuple class, which represents a version in the form major[....
void setSpiPsInputAddr(unsigned Val)
void setEntryPoint(unsigned CC, StringRef Name)
const char * getVendor() const
void setFunctionScratchSize(StringRef FnName, unsigned Val)
bool setFromString(StringRef S)
void setNumUsedVgprs(unsigned CC, unsigned Val)
unsigned getRegister(unsigned Reg)
msgpack::DocNode * refComputeRegister(StringRef field)
void setFunctionNumUsedVgprs(StringRef FnName, unsigned Val)
bool setFromBlob(unsigned Type, StringRef Blob)
void setFunctionNumUsedSgprs(StringRef FnName, unsigned Val)
void setScratchSize(unsigned CC, unsigned Val)
void setRegister(unsigned Reg, unsigned Val)
void setHwStage(unsigned CC, StringRef field, unsigned Val)
void setRsrc1(unsigned CC, unsigned Val)
void setSpiPsInputEna(unsigned Val)
void setNumUsedAgprs(unsigned CC, unsigned Val)
void setGraphicsRegisters(StringRef field, unsigned Val)
bool checkComputeRegisters(StringRef field, unsigned Val)
void toBlob(unsigned Type, std::string &S)
void toString(std::string &S)
void setFunctionLdsSize(StringRef FnName, unsigned Val)
void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val)
void setRsrc2(unsigned CC, unsigned Val)
void setNumUsedSgprs(unsigned CC, unsigned Val)
void setComputeRegisters(StringRef field, unsigned Val)
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:407
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition StringRef.h:519
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
static Twine utohexstr(uint64_t Val)
Definition Twine.h:385
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
Represents a version number in the form major[.minor[.subminor[.build]]].
A node in a MsgPack Document.
MapDocNode & getMap(bool Convert=false)
Get a MapDocNode for a map node.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
DocNode & getRoot()
Get ref to the document's root element.
DocNode getNode()
Create a nil node associated with this Document.
LLVM_ABI bool readFromBlob(StringRef Blob, bool Multi, function_ref< int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)> Merger=[](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) { return -1;})
Read a document from a binary msgpack blob, merging into anything already in the Document.
A DocNode that is a map.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char NoteNameV2[]
const char NoteNameV3[]
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ NT_AMDGPU_METADATA
Definition ELF.h:1994
@ NT_AMD_PAL_METADATA
Definition ELF.h:1988
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
EnumStrings(const EnumStringsStorage< T, NumStrs, N, StrLen > &) -> EnumStrings< T, NumStrs >
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
#define N
Compile-time data representation of enum entries.
Definition Enum.h:47
Adapter to write values to a stream in a particular byte order.