LLVM 17.0.0git
SIMachineFunctionInfo.h
Go to the documentation of this file.
1//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15
18#include "AMDGPUTargetMachine.h"
20#include "SIInstrInfo.h"
22#include "llvm/ADT/SetVector.h"
26#include <optional>
27
28namespace llvm {
29
30class MachineFrameInfo;
31class MachineFunction;
32class SIMachineFunctionInfo;
33class SIRegisterInfo;
34class TargetRegisterClass;
35
37public:
38 enum AMDGPUPSVKind : unsigned {
41 };
42
43protected:
45 : PseudoSourceValue(Kind, TM) {}
46
47public:
48 bool isConstant(const MachineFrameInfo *) const override {
49 // This should probably be true for most images, but we will start by being
50 // conservative.
51 return false;
52 }
53
54 bool isAliased(const MachineFrameInfo *) const override {
55 return true;
56 }
57
58 bool mayAlias(const MachineFrameInfo *) const override {
59 return true;
60 }
61};
62
64public:
67
68 static bool classof(const PseudoSourceValue *V) {
69 return V->kind() == GWSResource;
70 }
71
72 // These are inaccessible memory from IR.
73 bool isAliased(const MachineFrameInfo *) const override {
74 return false;
75 }
76
77 // These are inaccessible memory from IR.
78 bool mayAlias(const MachineFrameInfo *) const override {
79 return false;
80 }
81
82 void printCustom(raw_ostream &OS) const override {
83 OS << "GWSResource";
84 }
85};
86
87namespace yaml {
88
89struct SIArgument {
91 union {
93 unsigned StackOffset;
94 };
95 std::optional<unsigned> Mask;
96
97 // Default constructor, which creates a stack argument.
100 IsRegister = Other.IsRegister;
101 if (IsRegister) {
102 ::new ((void *)std::addressof(RegisterName))
103 StringValue(Other.RegisterName);
104 } else
105 StackOffset = Other.StackOffset;
106 Mask = Other.Mask;
107 }
109 IsRegister = Other.IsRegister;
110 if (IsRegister) {
111 ::new ((void *)std::addressof(RegisterName))
112 StringValue(Other.RegisterName);
113 } else
114 StackOffset = Other.StackOffset;
115 Mask = Other.Mask;
116 return *this;
117 }
119 if (IsRegister)
120 RegisterName.~StringValue();
121 }
122
123 // Helper to create a register or stack argument.
124 static inline SIArgument createArgument(bool IsReg) {
125 if (IsReg)
126 return SIArgument(IsReg);
127 return SIArgument();
128 }
129
130private:
131 // Construct a register argument.
133};
134
135template <> struct MappingTraits<SIArgument> {
136 static void mapping(IO &YamlIO, SIArgument &A) {
137 if (YamlIO.outputting()) {
138 if (A.IsRegister)
139 YamlIO.mapRequired("reg", A.RegisterName);
140 else
141 YamlIO.mapRequired("offset", A.StackOffset);
142 } else {
143 auto Keys = YamlIO.keys();
144 if (is_contained(Keys, "reg")) {
146 YamlIO.mapRequired("reg", A.RegisterName);
147 } else if (is_contained(Keys, "offset"))
148 YamlIO.mapRequired("offset", A.StackOffset);
149 else
150 YamlIO.setError("missing required key 'reg' or 'offset'");
151 }
152 YamlIO.mapOptional("mask", A.Mask);
153 }
154 static const bool flow = true;
155};
156
158 std::optional<SIArgument> PrivateSegmentBuffer;
159 std::optional<SIArgument> DispatchPtr;
160 std::optional<SIArgument> QueuePtr;
161 std::optional<SIArgument> KernargSegmentPtr;
162 std::optional<SIArgument> DispatchID;
163 std::optional<SIArgument> FlatScratchInit;
164 std::optional<SIArgument> PrivateSegmentSize;
165
166 std::optional<SIArgument> WorkGroupIDX;
167 std::optional<SIArgument> WorkGroupIDY;
168 std::optional<SIArgument> WorkGroupIDZ;
169 std::optional<SIArgument> WorkGroupInfo;
170 std::optional<SIArgument> LDSKernelId;
171 std::optional<SIArgument> PrivateSegmentWaveByteOffset;
172
173 std::optional<SIArgument> ImplicitArgPtr;
174 std::optional<SIArgument> ImplicitBufferPtr;
175
176 std::optional<SIArgument> WorkItemIDX;
177 std::optional<SIArgument> WorkItemIDY;
178 std::optional<SIArgument> WorkItemIDZ;
179};
180
181template <> struct MappingTraits<SIArgumentInfo> {
182 static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
183 YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
184 YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
185 YamlIO.mapOptional("queuePtr", AI.QueuePtr);
186 YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
187 YamlIO.mapOptional("dispatchID", AI.DispatchID);
188 YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
189 YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
190
191 YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
192 YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
193 YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
194 YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
195 YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId);
196 YamlIO.mapOptional("privateSegmentWaveByteOffset",
198
199 YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
200 YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
201
202 YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
203 YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
204 YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
205 }
206};
207
208// Default to default mode for default calling convention.
209struct SIMode {
210 bool IEEE = true;
211 bool DX10Clamp = true;
216
217 SIMode() = default;
218
220 IEEE = Mode.IEEE;
221 DX10Clamp = Mode.DX10Clamp;
222 FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign;
224 Mode.FP32Denormals.Output != DenormalMode::PreserveSign;
226 Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign;
228 Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign;
229 }
230
231 bool operator ==(const SIMode Other) const {
232 return IEEE == Other.IEEE &&
233 DX10Clamp == Other.DX10Clamp &&
234 FP32InputDenormals == Other.FP32InputDenormals &&
235 FP32OutputDenormals == Other.FP32OutputDenormals &&
236 FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
237 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
238 }
239};
240
241template <> struct MappingTraits<SIMode> {
242 static void mapping(IO &YamlIO, SIMode &Mode) {
243 YamlIO.mapOptional("ieee", Mode.IEEE, true);
244 YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
245 YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
246 YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
247 YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
248 YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
249 }
250};
251
258 bool IsEntryFunction = false;
260 bool MemoryBound = false;
261 bool WaveLimiter = false;
262 bool HasSpilledSGPRs = false;
263 bool HasSpilledVGPRs = false;
265
266 // TODO: 10 may be a better default since it's the maximum.
267 unsigned Occupancy = 0;
268
270
271 StringValue ScratchRSrcReg = "$private_rsrc_reg";
274
276 bool ReturnsVoid = true;
277
278 std::optional<SIArgumentInfo> ArgInfo;
280 std::optional<FrameIndex> ScavengeFI;
282
285 const TargetRegisterInfo &TRI,
286 const llvm::MachineFunction &MF);
287
288 void mappingImpl(yaml::IO &YamlIO) override;
290};
291
293 static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
294 YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
295 UINT64_C(0));
296 YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign);
297 YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
298 YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u);
299 YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
300 YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
301 YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
302 YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
303 YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
304 YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
305 YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
306 YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
307 StringValue("$private_rsrc_reg"));
308 YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
309 StringValue("$fp_reg"));
310 YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
311 StringValue("$sp_reg"));
312 YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u);
313 YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true);
314 YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
315 YamlIO.mapOptional("mode", MFI.Mode, SIMode());
316 YamlIO.mapOptional("highBitsOf32BitAddress",
317 MFI.HighBitsOf32BitAddress, 0u);
318 YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
319 YamlIO.mapOptional("wwmReservedRegs", MFI.WWMReservedRegs);
320 YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
321 YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy,
322 StringValue()); // Don't print out when it's empty.
323 }
324};
325
326} // end namespace yaml
327
328// A CSR SGPR value can be preserved inside a callee using one of the following
329// methods.
330// 1. Copy to an unused scratch SGPR.
331// 2. Spill to a VGPR lane.
332// 3. Spill to memory via. a scratch VGPR.
333// class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used
334// for an SGPR at function prolog/epilog.
335enum class SGPRSaveKind : uint8_t {
339};
340
342 SGPRSaveKind Kind;
343 union {
344 int Index;
346 };
347
348public:
351 : Kind(K), Reg(R) {}
352 Register getReg() const { return Reg; }
353 int getIndex() const { return Index; }
354 SGPRSaveKind getKind() const { return Kind; }
355};
356
357/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
358/// tells the hardware which interpolation parameters to load.
360 friend class GCNTargetMachine;
361
362 // State of MODE register, assumed FP mode.
364
365 // Registers that may be reserved for spilling purposes. These may be the same
366 // as the input registers.
367 Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
368
369 // This is the unswizzled offset from the current dispatch's scratch wave
370 // base to the beginning of the current function's frame.
371 Register FrameOffsetReg = AMDGPU::FP_REG;
372
373 // This is an ABI register used in the non-entry calling convention to
374 // communicate the unswizzled offset from the current dispatch's scratch wave
375 // base to the beginning of the new function's frame.
376 Register StackPtrOffsetReg = AMDGPU::SP_REG;
377
379
380 // Graphics info.
381 unsigned PSInputAddr = 0;
382 unsigned PSInputEnable = 0;
383
384 /// Number of bytes of arguments this function has on the stack. If the callee
385 /// is expected to restore the argument stack this should be a multiple of 16,
386 /// all usable during a tail call.
387 ///
388 /// The alternative would forbid tail call optimisation in some cases: if we
389 /// want to transfer control from a function with 8-bytes of stack-argument
390 /// space to a function with 16-bytes then misalignment of this value would
391 /// make a stack adjustment necessary, which could not be undone by the
392 /// callee.
393 unsigned BytesInStackArgArea = 0;
394
395 bool ReturnsVoid = true;
396
397 // A pair of default/requested minimum/maximum flat work group sizes.
398 // Minimum - first, maximum - second.
399 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
400
401 // A pair of default/requested minimum/maximum number of waves per execution
402 // unit. Minimum - first, maximum - second.
403 std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
404
405 const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
406
407private:
408 unsigned NumUserSGPRs = 0;
409 unsigned NumSystemSGPRs = 0;
410
411 bool HasSpilledSGPRs = false;
412 bool HasSpilledVGPRs = false;
413 bool HasNonSpillStackObjects = false;
414 bool IsStackRealigned = false;
415
416 unsigned NumSpilledSGPRs = 0;
417 unsigned NumSpilledVGPRs = 0;
418
419 // Feature bits required for inputs passed in user SGPRs.
420 bool PrivateSegmentBuffer : 1;
421 bool DispatchPtr : 1;
422 bool QueuePtr : 1;
423 bool KernargSegmentPtr : 1;
424 bool DispatchID : 1;
425 bool FlatScratchInit : 1;
426
427 // Feature bits required for inputs passed in system SGPRs.
428 bool WorkGroupIDX : 1; // Always initialized.
429 bool WorkGroupIDY : 1;
430 bool WorkGroupIDZ : 1;
431 bool WorkGroupInfo : 1;
432 bool LDSKernelId : 1;
433 bool PrivateSegmentWaveByteOffset : 1;
434
435 bool WorkItemIDX : 1; // Always initialized.
436 bool WorkItemIDY : 1;
437 bool WorkItemIDZ : 1;
438
439 // Private memory buffer
440 // Compute directly in sgpr[0:1]
441 // Other shaders indirect 64-bits at sgpr[0:1]
442 bool ImplicitBufferPtr : 1;
443
444 // Pointer to where the ABI inserts special kernel arguments separate from the
445 // user arguments. This is an offset from the KernargSegmentPtr.
446 bool ImplicitArgPtr : 1;
447
448 bool MayNeedAGPRs : 1;
449
450 // The hard-wired high half of the address of the global information table
451 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
452 // current hardware only allows a 16 bit value.
453 unsigned GITPtrHigh;
454
455 unsigned HighBitsOf32BitAddress;
456
457 // Current recorded maximum possible occupancy.
458 unsigned Occupancy;
459
460 mutable std::optional<bool> UsesAGPRs;
461
462 MCPhysReg getNextUserSGPR() const;
463
464 MCPhysReg getNextSystemSGPR() const;
465
466public:
469 bool FullyAllocated = false;
470 bool IsDead = false;
471 };
472
473private:
474 // To track VGPR + lane index for each subregister of the SGPR spilled to
475 // frameindex key during SILowerSGPRSpills pass.
477 // To track VGPR + lane index for spilling special SGPRs like Frame Pointer
478 // identified during PrologEpilogInserter.
480 PrologEpilogSGPRSpillToVGPRLanes;
481 unsigned NumVGPRSpillLanes = 0;
482 unsigned NumVGPRPrologEpilogSpillLanes = 0;
483 SmallVector<Register, 2> SpillVGPRs;
485 // To track the registers used in instructions that can potentially modify the
486 // inactive lanes. The WWM instructions and the writelane instructions for
487 // spilling SGPRs to VGPRs fall under such category of operations. The VGPRs
488 // modified by them should be spilled/restored at function prolog/epilog to
489 // avoid any undesired outcome. Each entry in this map holds a pair of values,
490 // the VGPR and its stack slot index.
491 WWMSpillsMap WWMSpills;
492
494 // To track the VGPRs reserved for WWM instructions. They get stack slots
495 // later during PrologEpilogInserter and get added into the superset WWMSpills
496 // for actual spilling. A separate set makes the register reserved part and
497 // the serialization easier.
498 ReservedRegSet WWMReservedRegs;
499
502 // To track the SGPR spill method used for a CSR SGPR register during
503 // frame lowering. Even though the SGPR spills are handled during
504 // SILowerSGPRSpills pass, some special handling needed later during the
505 // PrologEpilogInserter.
506 PrologEpilogSGPRSpillsMap PrologEpilogSGPRSpills;
507
508 DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
509
510 // AGPRs used for VGPR spills.
512
513 // VGPRs used for AGPR spills.
515
516 // Emergency stack slot. Sometimes, we create this before finalizing the stack
517 // frame, so save it here and add it to the RegScavenger later.
518 std::optional<int> ScavengeFI;
519
520private:
521 Register VGPRForAGPRCopy;
522
523 bool allocateVGPRForSGPRSpills(MachineFunction &MF, int FI,
524 unsigned LaneIndex);
525 bool allocateVGPRForPrologEpilogSGPRSpills(MachineFunction &MF, int FI,
526 unsigned LaneIndex);
527
528public:
530 return VGPRForAGPRCopy;
531 }
532
533 void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) {
534 VGPRForAGPRCopy = NewVGPRForAGPRCopy;
535 }
536
537 bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const;
538
539public:
541 SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI);
542
546 const override;
547
549 const MachineFunction &MF,
551 SMDiagnostic &Error, SMRange &SourceRange);
552
553 void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
554
555 SIModeRegisterDefaults getMode() const { return Mode; }
556
558 getSGPRSpillToVGPRLanes(int FrameIndex) const {
559 auto I = SGPRSpillToVGPRLanes.find(FrameIndex);
560 return (I == SGPRSpillToVGPRLanes.end())
562 : ArrayRef(I->second);
563 }
564
565 ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
566 const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
567 const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
568
570 return PrologEpilogSGPRSpills;
571 }
572
575 PrologEpilogSGPRSpills.insert(std::make_pair(Reg, SI));
576 }
577
578 // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
579 // on success and false otherwise.
581 return PrologEpilogSGPRSpills.contains(Reg);
582 }
583
584 // Get the scratch SGPR if allocated to save/restore \p Reg.
586 auto I = PrologEpilogSGPRSpills.find(Reg);
587 if (I != PrologEpilogSGPRSpills.end() &&
588 I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
589 return I->second.getReg();
590
591 return AMDGPU::NoRegister;
592 }
593
594 // Get all scratch SGPRs allocated to copy/restore the SGPR spills.
596 for (const auto &SI : PrologEpilogSGPRSpills) {
597 if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
598 Regs.push_back(SI.second.getReg());
599 }
600 }
601
602 // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI.
604 return find_if(PrologEpilogSGPRSpills,
605 [FI](const std::pair<Register,
607 return SI.second.getKind() ==
609 SI.second.getIndex() == FI;
610 }) != PrologEpilogSGPRSpills.end();
611 }
612
615 auto I = PrologEpilogSGPRSpills.find(Reg);
616 assert(I != PrologEpilogSGPRSpills.end());
617
618 return I->second;
619 }
620
623 auto I = PrologEpilogSGPRSpillToVGPRLanes.find(FrameIndex);
624 return (I == PrologEpilogSGPRSpillToVGPRLanes.end())
626 : ArrayRef(I->second);
627 }
628
630 Align Alignment = Align(4));
631
633 MachineFunction &MF,
634 SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
635 SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const;
636
638 return SpillAGPR;
639 }
640
642 return SpillVGPR;
643 }
644
645 MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
646 auto I = VGPRToAGPRSpills.find(FrameIndex);
647 return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
648 : I->second.Lanes[Lane];
649 }
650
651 void setVGPRToAGPRSpillDead(int FrameIndex) {
652 auto I = VGPRToAGPRSpills.find(FrameIndex);
653 if (I != VGPRToAGPRSpills.end())
654 I->second.IsDead = true;
655 }
656
658 bool IsPrologEpilog = false);
659 bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
660
661 /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
662 /// to the default stack.
664 bool ResetSGPRSpillStackIDs);
665
667 std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
668
669 unsigned getBytesInStackArgArea() const {
670 return BytesInStackArgArea;
671 }
672
673 void setBytesInStackArgArea(unsigned Bytes) {
674 BytesInStackArgArea = Bytes;
675 }
676
677 // Add user SGPRs.
686
687 /// Increment user SGPRs used for padding the argument list only.
689 Register Next = getNextUserSGPR();
690 ++NumUserSGPRs;
691 return Next;
692 }
693
694 // Add system SGPRs.
695 Register addWorkGroupIDX(bool HasArchitectedSGPRs) {
696 Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP9 : getNextSystemSGPR();
698 if (!HasArchitectedSGPRs)
699 NumSystemSGPRs += 1;
700
701 return ArgInfo.WorkGroupIDX.getRegister();
702 }
703
704 Register addWorkGroupIDY(bool HasArchitectedSGPRs) {
705 Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP7 : getNextSystemSGPR();
706 unsigned Mask = HasArchitectedSGPRs && hasWorkGroupIDZ() ? 0xffff : ~0u;
707 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg, Mask);
708 if (!HasArchitectedSGPRs)
709 NumSystemSGPRs += 1;
710
711 return ArgInfo.WorkGroupIDY.getRegister();
712 }
713
714 Register addWorkGroupIDZ(bool HasArchitectedSGPRs) {
715 Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP7 : getNextSystemSGPR();
716 unsigned Mask = HasArchitectedSGPRs ? 0xffff << 16 : ~0u;
717 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg, Mask);
718 if (!HasArchitectedSGPRs)
719 NumSystemSGPRs += 1;
720
721 return ArgInfo.WorkGroupIDZ.getRegister();
722 }
723
725 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
726 NumSystemSGPRs += 1;
727 return ArgInfo.WorkGroupInfo.getRegister();
728 }
729
730 // Add special VGPR inputs
732 ArgInfo.WorkItemIDX = Arg;
733 }
734
736 ArgInfo.WorkItemIDY = Arg;
737 }
738
740 ArgInfo.WorkItemIDZ = Arg;
741 }
742
744 ArgInfo.PrivateSegmentWaveByteOffset
745 = ArgDescriptor::createRegister(getNextSystemSGPR());
746 NumSystemSGPRs += 1;
747 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
748 }
749
751 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
752 }
753
755 return PrivateSegmentBuffer;
756 }
757
758 bool hasDispatchPtr() const {
759 return DispatchPtr;
760 }
761
762 bool hasQueuePtr() const {
763 return QueuePtr;
764 }
765
766 bool hasKernargSegmentPtr() const {
767 return KernargSegmentPtr;
768 }
769
770 bool hasDispatchID() const {
771 return DispatchID;
772 }
773
774 bool hasFlatScratchInit() const {
775 return FlatScratchInit;
776 }
777
778 bool hasWorkGroupIDX() const {
779 return WorkGroupIDX;
780 }
781
782 bool hasWorkGroupIDY() const {
783 return WorkGroupIDY;
784 }
785
786 bool hasWorkGroupIDZ() const {
787 return WorkGroupIDZ;
788 }
789
790 bool hasWorkGroupInfo() const {
791 return WorkGroupInfo;
792 }
793
794 bool hasLDSKernelId() const { return LDSKernelId; }
795
797 return PrivateSegmentWaveByteOffset;
798 }
799
800 bool hasWorkItemIDX() const {
801 return WorkItemIDX;
802 }
803
804 bool hasWorkItemIDY() const {
805 return WorkItemIDY;
806 }
807
808 bool hasWorkItemIDZ() const {
809 return WorkItemIDZ;
810 }
811
812 bool hasImplicitArgPtr() const {
813 return ImplicitArgPtr;
814 }
815
816 bool hasImplicitBufferPtr() const {
817 return ImplicitBufferPtr;
818 }
819
821 return ArgInfo;
822 }
823
825 return ArgInfo;
826 }
827
828 std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
830 return ArgInfo.getPreloadedValue(Value);
831 }
832
834 auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
835 return Arg ? Arg->getRegister() : MCRegister();
836 }
837
838 unsigned getGITPtrHigh() const {
839 return GITPtrHigh;
840 }
841
842 Register getGITPtrLoReg(const MachineFunction &MF) const;
843
845 return HighBitsOf32BitAddress;
846 }
847
848 unsigned getNumUserSGPRs() const {
849 return NumUserSGPRs;
850 }
851
852 unsigned getNumPreloadedSGPRs() const {
853 return NumUserSGPRs + NumSystemSGPRs;
854 }
855
857 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
858 }
859
860 /// Returns the physical register reserved for use as the resource
861 /// descriptor for scratch accesses.
863 return ScratchRSrcReg;
864 }
865
867 assert(Reg != 0 && "Should never be unset");
868 ScratchRSrcReg = Reg;
869 }
870
872 return FrameOffsetReg;
873 }
874
876 assert(Reg != 0 && "Should never be unset");
877 FrameOffsetReg = Reg;
878 }
879
881 assert(Reg != 0 && "Should never be unset");
882 StackPtrOffsetReg = Reg;
883 }
884
885 // Note the unset value for this is AMDGPU::SP_REG rather than
886 // NoRegister. This is mostly a workaround for MIR tests where state that
887 // can't be directly computed from the function is not preserved in serialized
888 // MIR.
890 return StackPtrOffsetReg;
891 }
892
894 return ArgInfo.QueuePtr.getRegister();
895 }
896
898 return ArgInfo.ImplicitBufferPtr.getRegister();
899 }
900
901 bool hasSpilledSGPRs() const {
902 return HasSpilledSGPRs;
903 }
904
905 void setHasSpilledSGPRs(bool Spill = true) {
906 HasSpilledSGPRs = Spill;
907 }
908
909 bool hasSpilledVGPRs() const {
910 return HasSpilledVGPRs;
911 }
912
913 void setHasSpilledVGPRs(bool Spill = true) {
914 HasSpilledVGPRs = Spill;
915 }
916
918 return HasNonSpillStackObjects;
919 }
920
921 void setHasNonSpillStackObjects(bool StackObject = true) {
922 HasNonSpillStackObjects = StackObject;
923 }
924
925 bool isStackRealigned() const {
926 return IsStackRealigned;
927 }
928
929 void setIsStackRealigned(bool Realigned = true) {
930 IsStackRealigned = Realigned;
931 }
932
933 unsigned getNumSpilledSGPRs() const {
934 return NumSpilledSGPRs;
935 }
936
937 unsigned getNumSpilledVGPRs() const {
938 return NumSpilledVGPRs;
939 }
940
941 void addToSpilledSGPRs(unsigned num) {
942 NumSpilledSGPRs += num;
943 }
944
945 void addToSpilledVGPRs(unsigned num) {
946 NumSpilledVGPRs += num;
947 }
948
949 unsigned getPSInputAddr() const {
950 return PSInputAddr;
951 }
952
953 unsigned getPSInputEnable() const {
954 return PSInputEnable;
955 }
956
957 bool isPSInputAllocated(unsigned Index) const {
958 return PSInputAddr & (1 << Index);
959 }
960
962 PSInputAddr |= 1 << Index;
963 }
964
965 void markPSInputEnabled(unsigned Index) {
966 PSInputEnable |= 1 << Index;
967 }
968
969 bool returnsVoid() const {
970 return ReturnsVoid;
971 }
972
974 ReturnsVoid = Value;
975 }
976
977 /// \returns A pair of default/requested minimum/maximum flat work group sizes
978 /// for this function.
979 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
980 return FlatWorkGroupSizes;
981 }
982
983 /// \returns Default/requested minimum flat work group size for this function.
984 unsigned getMinFlatWorkGroupSize() const {
985 return FlatWorkGroupSizes.first;
986 }
987
988 /// \returns Default/requested maximum flat work group size for this function.
989 unsigned getMaxFlatWorkGroupSize() const {
990 return FlatWorkGroupSizes.second;
991 }
992
993 /// \returns A pair of default/requested minimum/maximum number of waves per
994 /// execution unit.
995 std::pair<unsigned, unsigned> getWavesPerEU() const {
996 return WavesPerEU;
997 }
998
999 /// \returns Default/requested minimum number of waves per execution unit.
1000 unsigned getMinWavesPerEU() const {
1001 return WavesPerEU.first;
1002 }
1003
1004 /// \returns Default/requested maximum number of waves per execution unit.
1005 unsigned getMaxWavesPerEU() const {
1006 return WavesPerEU.second;
1007 }
1008
1009 /// \returns SGPR used for \p Dim's work group ID.
1010 Register getWorkGroupIDSGPR(unsigned Dim) const {
1011 switch (Dim) {
1012 case 0:
1014 return ArgInfo.WorkGroupIDX.getRegister();
1015 case 1:
1017 return ArgInfo.WorkGroupIDY.getRegister();
1018 case 2:
1020 return ArgInfo.WorkGroupIDZ.getRegister();
1021 }
1022 llvm_unreachable("unexpected dimension");
1023 }
1024
1027 return &GWSResourcePSV;
1028 }
1029
1030 unsigned getOccupancy() const {
1031 return Occupancy;
1032 }
1033
1034 unsigned getMinAllowedOccupancy() const {
1035 if (!isMemoryBound() && !needsWaveLimiter())
1036 return Occupancy;
1037 return (Occupancy < 4) ? Occupancy : 4;
1038 }
1039
1040 void limitOccupancy(const MachineFunction &MF);
1041
1042 void limitOccupancy(unsigned Limit) {
1043 if (Occupancy > Limit)
1044 Occupancy = Limit;
1045 }
1046
1047 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
1048 if (Occupancy < Limit)
1049 Occupancy = Limit;
1050 limitOccupancy(MF);
1051 }
1052
1053 bool mayNeedAGPRs() const {
1054 return MayNeedAGPRs;
1055 }
1056
1057 // \returns true if a function has a use of AGPRs via inline asm or
1058 // has a call which may use it.
1059 bool mayUseAGPRs(const Function &F) const;
1060
1061 // \returns true if a function needs or may need AGPRs.
1062 bool usesAGPRs(const MachineFunction &MF) const;
1063};
1064
1065} // end namespace llvm
1066
1067#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Provides AMDGPU specific target descriptions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
basic Basic Alias true
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Size
IO & YamlIO
Definition: ELFYAML.cpp:1259
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
unsigned Reg
const char LLVMTargetMachineRef TM
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
@ SI
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
std::array< StringRef, 64 > Keys
Definition: TextStubV5.cpp:123
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
static bool classof(const PseudoSourceValue *V)
AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
bool isConstant(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue has a constant value.
AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R)
PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I)
Special value supplied for machine level alias analysis.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const WWMSpillsMap & getWWMSpills() const
bool usesAGPRs(const MachineFunction &MF) const
bool isPSInputAllocated(unsigned Index) const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
void setWorkItemIDY(ArgDescriptor Arg)
const PrologEpilogSGPRSpillsMap & getPrologEpilogSGPRSpills() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void setWorkItemIDZ(ArgDescriptor Arg)
std::pair< unsigned, unsigned > getWavesPerEU() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
void setVGPRToAGPRSpillDead(int FrameIndex)
std::pair< unsigned, unsigned > getFlatWorkGroupSizes() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
void setStackPtrOffsetReg(Register Reg)
Register addReservedUserSGPR()
Increment user SGPRs used for padding the argument list only.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register addQueuePtr(const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool IsPrologEpilog=false)
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
void setBytesInStackArgArea(unsigned Bytes)
SIModeRegisterDefaults getMode() const
Register addWorkGroupIDX(bool HasArchitectedSGPRs)
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Register addWorkGroupIDY(bool HasArchitectedSGPRs)
void setPrivateSegmentWaveByteOffset(Register Reg)
const AMDGPUFunctionArgInfo & getArgInfo() const
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void markPSInputAllocated(unsigned Index)
void setWorkItemIDX(ArgDescriptor Arg)
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
void markPSInputEnabled(unsigned Index)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
void setHasSpilledSGPRs(bool Spill=true)
Register getWorkGroupIDSGPR(unsigned Dim) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVGPRLanes(int FrameIndex) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
Register addWorkGroupIDZ(bool HasArchitectedSGPRs)
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
AMDGPUFunctionArgInfo & getArgInfo()
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setHasNonSpillStackObjects(bool StackObject=true)
void setIsStackRealigned(bool Realigned=true)
void limitOccupancy(const MachineFunction &MF)
ArrayRef< Register > getSGPRSpillVGPRs() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:281
Represents a range in source code.
Definition: SMLoc.h:48
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1846
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:51
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
static void mapping(IO &YamlIO, SIArgumentInfo &AI)
static void mapping(IO &YamlIO, SIArgument &A)
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI)
static void mapping(IO &YamlIO, SIMode &Mode)
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
SIArgument(const SIArgument &Other)
SIArgument & operator=(const SIArgument &Other)
static SIArgument createArgument(bool IsReg)
SmallVector< StringValue > WWMReservedRegs
void mappingImpl(yaml::IO &YamlIO) override
std::optional< SIArgumentInfo > ArgInfo
std::optional< FrameIndex > ScavengeFI
SIMode(const SIModeRegisterDefaults &Mode)
bool operator==(const SIMode Other) const
A wrapper around std::string which contains a source range that's being set during parsing.