LLVM 17.0.0git
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "AMDGPUTargetMachine.h"
11#include "AMDGPUSubtarget.h"
12#include "SIRegisterInfo.h"
21#include "llvm/IR/CallingConv.h"
23#include "llvm/IR/Function.h"
24#include <cassert>
25#include <optional>
26#include <vector>
27
28#define MAX_LANES 64
29
30using namespace llvm;
31
33 const SITargetLowering *TLI = STI->getTargetLowering();
34 return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
35}
36
38 const GCNSubtarget *STI)
39 : AMDGPUMachineFunction(F, *STI),
40 Mode(F),
41 GWSResourcePSV(getTM(STI)),
42 PrivateSegmentBuffer(false),
43 DispatchPtr(false),
44 QueuePtr(false),
45 KernargSegmentPtr(false),
46 DispatchID(false),
47 FlatScratchInit(false),
48 WorkGroupIDX(false),
49 WorkGroupIDY(false),
50 WorkGroupIDZ(false),
51 WorkGroupInfo(false),
52 LDSKernelId(false),
53 PrivateSegmentWaveByteOffset(false),
54 WorkItemIDX(false),
55 WorkItemIDY(false),
56 WorkItemIDZ(false),
57 ImplicitBufferPtr(false),
58 ImplicitArgPtr(false),
59 GITPtrHigh(0xffffffff),
60 HighBitsOf32BitAddress(0) {
61 const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
62 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
63 WavesPerEU = ST.getWavesPerEU(F);
64
65 Occupancy = ST.computeOccupancy(F, getLDSSize());
66 CallingConv::ID CC = F.getCallingConv();
67
68 // FIXME: Should have analysis or something rather than attribute to detect
69 // calls.
70 const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
71
72 const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
74
75 if (IsKernel) {
76 if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
77 KernargSegmentPtr = true;
78 WorkGroupIDX = true;
79 WorkItemIDX = true;
80 } else if (CC == CallingConv::AMDGPU_PS) {
81 PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
82 }
83
84 MayNeedAGPRs = ST.hasMAIInsts();
85
86 if (!isEntryFunction()) {
89
90 // TODO: Pick a high register, and shift down, similar to a kernel.
91 FrameOffsetReg = AMDGPU::SGPR33;
92 StackPtrOffsetReg = AMDGPU::SGPR32;
93
94 if (!ST.enableFlatScratch()) {
95 // Non-entry functions have no special inputs for now, other registers
96 // required for scratch access.
97 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
98
99 ArgInfo.PrivateSegmentBuffer =
100 ArgDescriptor::createRegister(ScratchRSrcReg);
101 }
102
103 if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
104 ImplicitArgPtr = true;
105 } else {
106 ImplicitArgPtr = false;
107 MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
109
110 if (ST.hasGFX90AInsts() &&
111 ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
112 !mayUseAGPRs(F))
113 MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
114 }
115
116 bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
117 if (isAmdHsaOrMesa && !ST.enableFlatScratch())
118 PrivateSegmentBuffer = true;
119 else if (ST.isMesaGfxShader(F))
120 ImplicitBufferPtr = true;
121
122 if (!AMDGPU::isGraphics(CC) ||
123 (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
124 if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
125 WorkGroupIDX = true;
126
127 if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
128 WorkGroupIDY = true;
129
130 if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
131 WorkGroupIDZ = true;
132 }
133
134 if (!AMDGPU::isGraphics(CC)) {
135 if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
136 WorkItemIDX = true;
137
138 if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
139 ST.getMaxWorkitemID(F, 1) != 0)
140 WorkItemIDY = true;
141
142 if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
143 ST.getMaxWorkitemID(F, 2) != 0)
144 WorkItemIDZ = true;
145
146 if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
147 DispatchPtr = true;
148
149 if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
150 QueuePtr = true;
151
152 if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
153 DispatchID = true;
154
155 if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
156 LDSKernelId = true;
157 }
158
159 // FIXME: This attribute is a hack, we just need an analysis on the function
160 // to look for allocas.
161 bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
162
163 // TODO: This could be refined a lot. The attribute is a poor way of
164 // detecting calls or stack objects that may require it before argument
165 // lowering.
166 if (ST.hasFlatAddressSpace() && isEntryFunction() &&
167 (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
168 (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
169 !ST.flatScratchIsArchitected()) {
170 FlatScratchInit = true;
171 }
172
173 if (isEntryFunction()) {
174 // X, XY, and XYZ are the only supported combinations, so make sure Y is
175 // enabled if Z is.
176 if (WorkItemIDZ)
177 WorkItemIDY = true;
178
179 if (!ST.flatScratchIsArchitected()) {
180 PrivateSegmentWaveByteOffset = true;
181
182 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
183 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
185 ArgInfo.PrivateSegmentWaveByteOffset =
186 ArgDescriptor::createRegister(AMDGPU::SGPR5);
187 }
188 }
189
190 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
191 StringRef S = A.getValueAsString();
192 if (!S.empty())
193 S.consumeInteger(0, GITPtrHigh);
194
195 A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
196 S = A.getValueAsString();
197 if (!S.empty())
198 S.consumeInteger(0, HighBitsOf32BitAddress);
199
200 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
201 // VGPR available at all times. For now, reserve highest available VGPR. After
202 // RA, shift it to the lowest available unused VGPR if the one exist.
203 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
204 VGPRForAGPRCopy =
205 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
206 }
207}
208
210 BumpPtrAllocator &Allocator, MachineFunction &DestMF,
212 const {
213 return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
214}
215
218 const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
219 limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
220 MF.getFunction()));
221}
222
224 const SIRegisterInfo &TRI) {
225 ArgInfo.PrivateSegmentBuffer =
226 ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
227 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
228 NumUserSGPRs += 4;
229 return ArgInfo.PrivateSegmentBuffer.getRegister();
230}
231
233 ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
234 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
235 NumUserSGPRs += 2;
236 return ArgInfo.DispatchPtr.getRegister();
237}
238
240 ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
241 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
242 NumUserSGPRs += 2;
243 return ArgInfo.QueuePtr.getRegister();
244}
245
247 ArgInfo.KernargSegmentPtr
248 = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
249 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
250 NumUserSGPRs += 2;
251 return ArgInfo.KernargSegmentPtr.getRegister();
252}
253
255 ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
256 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
257 NumUserSGPRs += 2;
258 return ArgInfo.DispatchID.getRegister();
259}
260
262 ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
263 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
264 NumUserSGPRs += 2;
265 return ArgInfo.FlatScratchInit.getRegister();
266}
267
269 ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
270 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
271 NumUserSGPRs += 2;
272 return ArgInfo.ImplicitBufferPtr.getRegister();
273}
274
276 ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
277 NumUserSGPRs += 1;
278 return ArgInfo.LDSKernelId.getRegister();
279}
280
282 uint64_t Size, Align Alignment) {
283 // Skip if it is an entry function or the register is already added.
284 if (isEntryFunction() || WWMSpills.count(VGPR))
285 return;
286
287 WWMSpills.insert(std::make_pair(
288 VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
289}
290
291// Separate out the callee-saved and scratch registers.
293 MachineFunction &MF,
294 SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
295 SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
296 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
297 for (auto &Reg : WWMSpills) {
298 if (isCalleeSavedReg(CSRegs, Reg.first))
299 CalleeSavedRegs.push_back(Reg);
300 else
301 ScratchRegs.push_back(Reg);
302 }
303}
304
306 MCPhysReg Reg) const {
307 for (unsigned I = 0; CSRegs[I]; ++I) {
308 if (CSRegs[I] == Reg)
309 return true;
310 }
311
312 return false;
313}
314
315bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF,
316 int FI,
317 unsigned LaneIndex) {
318 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
319 const SIRegisterInfo *TRI = ST.getRegisterInfo();
321 Register LaneVGPR;
322 if (!LaneIndex) {
323 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
324 if (LaneVGPR == AMDGPU::NoRegister) {
325 // We have no VGPRs left for spilling SGPRs. Reset because we will not
326 // partially spill the SGPR to VGPRs.
327 SGPRSpillToVGPRLanes.erase(FI);
328 return false;
329 }
330
331 SpillVGPRs.push_back(LaneVGPR);
332 // Add this register as live-in to all blocks to avoid machine verifier
333 // complaining about use of an undefined physical register.
334 for (MachineBasicBlock &BB : MF)
335 BB.addLiveIn(LaneVGPR);
336 } else {
337 LaneVGPR = SpillVGPRs.back();
338 }
339
340 SGPRSpillToVGPRLanes[FI].push_back(
341 SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
342 return true;
343}
344
345bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
346 MachineFunction &MF, int FI, unsigned LaneIndex) {
348 const SIRegisterInfo *TRI = ST.getRegisterInfo();
350 Register LaneVGPR;
351 if (!LaneIndex) {
352 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
353 if (LaneVGPR == AMDGPU::NoRegister) {
354 // We have no VGPRs left for spilling SGPRs. Reset because we will not
355 // partially spill the SGPR to VGPRs.
356 PrologEpilogSGPRSpillToVGPRLanes.erase(FI);
357 return false;
358 }
359
360 allocateWWMSpill(MF, LaneVGPR);
361 } else {
362 LaneVGPR = WWMSpills.back().first;
363 }
364
365 PrologEpilogSGPRSpillToVGPRLanes[FI].push_back(
366 SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
367 return true;
368}
369
371 int FI,
372 bool IsPrologEpilog) {
373 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
374 IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI]
375 : SGPRSpillToVGPRLanes[FI];
376
377 // This has already been allocated.
378 if (!SpillLanes.empty())
379 return true;
380
381 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
382 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
383 unsigned WaveSize = ST.getWavefrontSize();
384
385 unsigned Size = FrameInfo.getObjectSize(FI);
386 unsigned NumLanes = Size / 4;
387
388 if (NumLanes > WaveSize)
389 return false;
390
391 assert(Size >= 4 && "invalid sgpr spill size");
392 assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
393 "not spilling SGPRs to VGPRs");
394
395 unsigned &NumSpillLanes =
396 IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes;
397
398 for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
399 unsigned LaneIndex = (NumSpillLanes % WaveSize);
400
401 bool Allocated =
402 IsPrologEpilog
403 ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex)
404 : allocateVGPRForSGPRSpills(MF, FI, LaneIndex);
405 if (!Allocated) {
406 NumSpillLanes -= I;
407 return false;
408 }
409 }
410
411 return true;
412}
413
414/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
415/// Either AGPR is spilled to VGPR to vice versa.
416/// Returns true if a \p FI can be eliminated completely.
418 int FI,
419 bool isAGPRtoVGPR) {
421 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
422 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
423
424 assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
425
426 auto &Spill = VGPRToAGPRSpills[FI];
427
428 // This has already been allocated.
429 if (!Spill.Lanes.empty())
430 return Spill.FullyAllocated;
431
432 unsigned Size = FrameInfo.getObjectSize(FI);
433 unsigned NumLanes = Size / 4;
434 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
435
436 const TargetRegisterClass &RC =
437 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
438 auto Regs = RC.getRegisters();
439
440 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
441 const SIRegisterInfo *TRI = ST.getRegisterInfo();
442 Spill.FullyAllocated = true;
443
444 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
445 // once.
446 BitVector OtherUsedRegs;
447 OtherUsedRegs.resize(TRI->getNumRegs());
448
449 const uint32_t *CSRMask =
450 TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
451 if (CSRMask)
452 OtherUsedRegs.setBitsInMask(CSRMask);
453
454 // TODO: Should include register tuples, but doesn't matter with current
455 // usage.
456 for (MCPhysReg Reg : SpillAGPR)
457 OtherUsedRegs.set(Reg);
458 for (MCPhysReg Reg : SpillVGPR)
459 OtherUsedRegs.set(Reg);
460
461 SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
462 for (int I = NumLanes - 1; I >= 0; --I) {
463 NextSpillReg = std::find_if(
464 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
465 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
466 !OtherUsedRegs[Reg];
467 });
468
469 if (NextSpillReg == Regs.end()) { // Registers exhausted
470 Spill.FullyAllocated = false;
471 break;
472 }
473
474 OtherUsedRegs.set(*NextSpillReg);
475 SpillRegs.push_back(*NextSpillReg);
476 MRI.reserveReg(*NextSpillReg, TRI);
477 Spill.Lanes[I] = *NextSpillReg++;
478 }
479
480 return Spill.FullyAllocated;
481}
482
484 MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
485 // Remove dead frame indices from function frame. And also make sure to remove
486 // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it
487 // could result in an unexpected side effect and bug, in case of any
488 // re-mapping of freed frame indices by later pass(es) like "stack slot
489 // coloring".
490 for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) {
491 MFI.RemoveStackObject(R.first);
492 SGPRSpillToVGPRLanes.erase(R.first);
493 }
494
495 bool HaveSGPRToMemory = false;
496
497 if (ResetSGPRSpillStackIDs) {
498 // All other SGPRs must be allocated on the default stack, so reset the
499 // stack ID.
500 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
501 ++I) {
505 HaveSGPRToMemory = true;
506 }
507 }
508 }
509 }
510
511 for (auto &R : VGPRToAGPRSpills) {
512 if (R.second.IsDead)
513 MFI.RemoveStackObject(R.first);
514 }
515
516 return HaveSGPRToMemory;
517}
518
520 const SIRegisterInfo &TRI) {
521 if (ScavengeFI)
522 return *ScavengeFI;
523 if (isEntryFunction()) {
524 ScavengeFI = MFI.CreateFixedObject(
525 TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
526 } else {
527 ScavengeFI = MFI.CreateStackObject(
528 TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
529 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
530 }
531 return *ScavengeFI;
532}
533
534MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
535 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
536 return AMDGPU::SGPR0 + NumUserSGPRs;
537}
538
539MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
540 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
541}
542
545 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
546 if (!ST.isAmdPalOS())
547 return Register();
548 Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
549 if (ST.hasMergedShaders()) {
550 switch (MF.getFunction().getCallingConv()) {
553 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
554 // ES+GS merged shader on gfx9+.
555 GitPtrLo = AMDGPU::SGPR8;
556 return GitPtrLo;
557 default:
558 return GitPtrLo;
559 }
560 }
561 return GitPtrLo;
562}
563
565 const TargetRegisterInfo &TRI) {
567 {
569 OS << printReg(Reg, &TRI);
570 }
571 return Dest;
572}
573
574static std::optional<yaml::SIArgumentInfo>
576 const TargetRegisterInfo &TRI) {
578
579 auto convertArg = [&](std::optional<yaml::SIArgument> &A,
580 const ArgDescriptor &Arg) {
581 if (!Arg)
582 return false;
583
584 // Create a register or stack argument.
586 if (Arg.isRegister()) {
588 OS << printReg(Arg.getRegister(), &TRI);
589 } else
590 SA.StackOffset = Arg.getStackOffset();
591 // Check and update the optional mask.
592 if (Arg.isMasked())
593 SA.Mask = Arg.getMask();
594
595 A = SA;
596 return true;
597 };
598
599 bool Any = false;
600 Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
601 Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
602 Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
603 Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
604 Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
605 Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
606 Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
607 Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
608 Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
609 Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
610 Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
611 Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
612 Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
613 ArgInfo.PrivateSegmentWaveByteOffset);
614 Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
615 Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
616 Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
617 Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
618 Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
619
620 if (Any)
621 return AI;
622
623 return std::nullopt;
624}
625
628 const llvm::MachineFunction &MF)
629 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
630 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
631 GDSSize(MFI.getGDSSize()),
632 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
633 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
634 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
635 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
636 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
637 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
638 Occupancy(MFI.getOccupancy()),
639 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
640 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
641 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
642 BytesInStackArgArea(MFI.getBytesInStackArgArea()),
643 ReturnsVoid(MFI.returnsVoid()),
644 ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
645 for (Register Reg : MFI.getWWMReservedRegs())
646 WWMReservedRegs.push_back(regToString(Reg, TRI));
647
648 if (MFI.getVGPRForAGPRCopy())
650 auto SFI = MFI.getOptionalScavengeFI();
651 if (SFI)
653}
654
657}
658
660 const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
664 LDSSize = YamlMFI.LDSSize;
665 GDSSize = YamlMFI.GDSSize;
666 DynLDSAlign = YamlMFI.DynLDSAlign;
668 Occupancy = YamlMFI.Occupancy;
671 MemoryBound = YamlMFI.MemoryBound;
672 WaveLimiter = YamlMFI.WaveLimiter;
676 ReturnsVoid = YamlMFI.ReturnsVoid;
677
678 if (YamlMFI.ScavengeFI) {
679 auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
680 if (!FIOrErr) {
681 // Create a diagnostic for a the frame index.
682 const MemoryBuffer &Buffer =
683 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
684
685 Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
686 SourceMgr::DK_Error, toString(FIOrErr.takeError()),
687 "", std::nullopt, std::nullopt);
688 SourceRange = YamlMFI.ScavengeFI->SourceRange;
689 return true;
690 }
691 ScavengeFI = *FIOrErr;
692 } else {
693 ScavengeFI = std::nullopt;
694 }
695 return false;
696}
697
699 for (const BasicBlock &BB : F) {
700 for (const Instruction &I : BB) {
701 const auto *CB = dyn_cast<CallBase>(&I);
702 if (!CB)
703 continue;
704
705 if (CB->isInlineAsm()) {
706 const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
707 for (const auto &CI : IA->ParseConstraints()) {
708 for (StringRef Code : CI.Codes) {
709 Code.consume_front("{");
710 if (Code.startswith("a"))
711 return true;
712 }
713 }
714 continue;
715 }
716
717 const Function *Callee =
718 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
719 if (!Callee)
720 return true;
721
722 if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
723 return true;
724 }
725 }
726
727 return false;
728}
729
731 if (UsesAGPRs)
732 return *UsesAGPRs;
733
734 if (!mayNeedAGPRs()) {
735 UsesAGPRs = false;
736 return false;
737 }
738
740 MF.getFrameInfo().hasCalls()) {
741 UsesAGPRs = true;
742 return true;
743 }
744
745 const MachineRegisterInfo &MRI = MF.getRegInfo();
746
747 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
749 const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
750 if (RC && SIRegisterInfo::isAGPRClass(RC)) {
751 UsesAGPRs = true;
752 return true;
753 } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
754 // Defer caching UsesAGPRs, function might not yet been regbank selected.
755 return true;
756 }
757 }
758
759 for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
760 if (MRI.isPhysRegUsed(Reg)) {
761 UsesAGPRs = true;
762 return true;
763 }
764 }
765
766 UsesAGPRs = false;
767 return false;
768}
unsigned const MachineRegisterInfo * MRI
@ HasCalls
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
uint64_t Size
IO & YamlIO
Definition: ELFYAML.cpp:1259
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
raw_pwrite_stream & OS
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
Definition: Any.h:28
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Definition: BitVector.h:707
void push_back(bool Val)
Definition: BitVector.h:466
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:231
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int getObjectIndexBegin() const
Return the minimum frame object index.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * cloneInfo(const Ty &Old)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
size_type count(const KeyT &Key) const
Definition: MapVector.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:118
std::pair< KeyT, ValueT > & back()
Definition: MapVector.h:86
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:51
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:76
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
Register addFlatScratchInit(const SIRegisterInfo &TRI)
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register addQueuePtr(const SIRegisterInfo &TRI)
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool IsPrologEpilog=false)
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
Register getGITPtrLoReg(const MachineFunction &MF) const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
const ReservedRegSet & getWWMReservedRegs() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
void limitOccupancy(const MachineFunction &MF)
static bool isAGPRClass(const TargetRegisterClass *RC)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:281
Represents a location in source code.
Definition: SMLoc.h:23
Represents a range in source code.
Definition: SMLoc.h:48
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:582
unsigned getMainFileID() const
Definition: SourceMgr.h:132
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:125
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
const TargetMachine & getTargetMachine() const
iterator_range< SmallVectorImpl< MCPhysReg >::const_iterator > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getInitialPSInputAddr(const Function &F)
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:194
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:203
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:188
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:191
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:51
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
A serializaable representation of a reference to a stack object or fixed stack object.
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
static SIArgument createArgument(bool IsReg)
SmallVector< StringValue > WWMReservedRegs
void mappingImpl(yaml::IO &YamlIO) override
std::optional< FrameIndex > ScavengeFI
A wrapper around std::string which contains a source range that's being set during parsing.