LLVM 20.0.0git
SIMachineFunctionInfo.h
Go to the documentation of this file.
1//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15
18#include "AMDGPUTargetMachine.h"
19#include "GCNSubtarget.h"
21#include "SIInstrInfo.h"
23#include "llvm/ADT/SetVector.h"
28#include <optional>
29
30namespace llvm {
31
32class MachineFrameInfo;
33class MachineFunction;
34class SIMachineFunctionInfo;
35class SIRegisterInfo;
36class TargetRegisterClass;
37
39public:
40 enum AMDGPUPSVKind : unsigned {
43 };
44
45protected:
47 : PseudoSourceValue(Kind, TM) {}
48
49public:
50 bool isConstant(const MachineFrameInfo *) const override {
51 // This should probably be true for most images, but we will start by being
52 // conservative.
53 return false;
54 }
55
56 bool isAliased(const MachineFrameInfo *) const override {
57 return true;
58 }
59
60 bool mayAlias(const MachineFrameInfo *) const override {
61 return true;
62 }
63};
64
66public:
69
70 static bool classof(const PseudoSourceValue *V) {
71 return V->kind() == GWSResource;
72 }
73
74 // These are inaccessible memory from IR.
75 bool isAliased(const MachineFrameInfo *) const override {
76 return false;
77 }
78
79 // These are inaccessible memory from IR.
80 bool mayAlias(const MachineFrameInfo *) const override {
81 return false;
82 }
83
84 void printCustom(raw_ostream &OS) const override {
85 OS << "GWSResource";
86 }
87};
88
89namespace yaml {
90
91struct SIArgument {
93 union {
95 unsigned StackOffset;
96 };
97 std::optional<unsigned> Mask;
98
99 // Default constructor, which creates a stack argument.
102 IsRegister = Other.IsRegister;
103 if (IsRegister)
104 new (&RegisterName) StringValue(Other.RegisterName);
105 else
106 StackOffset = Other.StackOffset;
107 Mask = Other.Mask;
108 }
110 // Default-construct or destruct the old RegisterName in case of switching
111 // union members
112 if (IsRegister != Other.IsRegister) {
113 if (Other.IsRegister)
114 new (&RegisterName) StringValue();
115 else
116 RegisterName.~StringValue();
117 }
118 IsRegister = Other.IsRegister;
119 if (IsRegister)
120 RegisterName = Other.RegisterName;
121 else
122 StackOffset = Other.StackOffset;
123 Mask = Other.Mask;
124 return *this;
125 }
127 if (IsRegister)
128 RegisterName.~StringValue();
129 }
130
131 // Helper to create a register or stack argument.
132 static inline SIArgument createArgument(bool IsReg) {
133 if (IsReg)
134 return SIArgument(IsReg);
135 return SIArgument();
136 }
137
138private:
139 // Construct a register argument.
141};
142
143template <> struct MappingTraits<SIArgument> {
144 static void mapping(IO &YamlIO, SIArgument &A) {
145 if (YamlIO.outputting()) {
146 if (A.IsRegister)
147 YamlIO.mapRequired("reg", A.RegisterName);
148 else
149 YamlIO.mapRequired("offset", A.StackOffset);
150 } else {
151 auto Keys = YamlIO.keys();
152 if (is_contained(Keys, "reg")) {
154 YamlIO.mapRequired("reg", A.RegisterName);
155 } else if (is_contained(Keys, "offset"))
156 YamlIO.mapRequired("offset", A.StackOffset);
157 else
158 YamlIO.setError("missing required key 'reg' or 'offset'");
159 }
160 YamlIO.mapOptional("mask", A.Mask);
161 }
162 static const bool flow = true;
163};
164
166 std::optional<SIArgument> PrivateSegmentBuffer;
167 std::optional<SIArgument> DispatchPtr;
168 std::optional<SIArgument> QueuePtr;
169 std::optional<SIArgument> KernargSegmentPtr;
170 std::optional<SIArgument> DispatchID;
171 std::optional<SIArgument> FlatScratchInit;
172 std::optional<SIArgument> PrivateSegmentSize;
173
174 std::optional<SIArgument> WorkGroupIDX;
175 std::optional<SIArgument> WorkGroupIDY;
176 std::optional<SIArgument> WorkGroupIDZ;
177 std::optional<SIArgument> WorkGroupInfo;
178 std::optional<SIArgument> LDSKernelId;
179 std::optional<SIArgument> PrivateSegmentWaveByteOffset;
180
181 std::optional<SIArgument> ImplicitArgPtr;
182 std::optional<SIArgument> ImplicitBufferPtr;
183
184 std::optional<SIArgument> WorkItemIDX;
185 std::optional<SIArgument> WorkItemIDY;
186 std::optional<SIArgument> WorkItemIDZ;
187};
188
189template <> struct MappingTraits<SIArgumentInfo> {
190 static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
191 YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
192 YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
193 YamlIO.mapOptional("queuePtr", AI.QueuePtr);
194 YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
195 YamlIO.mapOptional("dispatchID", AI.DispatchID);
196 YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
197 YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
198
199 YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
200 YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
201 YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
202 YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
203 YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId);
204 YamlIO.mapOptional("privateSegmentWaveByteOffset",
206
207 YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
208 YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
209
210 YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
211 YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
212 YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
213 }
214};
215
216// Default to default mode for default calling convention.
217struct SIMode {
218 bool IEEE = true;
219 bool DX10Clamp = true;
224
225 SIMode() = default;
226
228 IEEE = Mode.IEEE;
229 DX10Clamp = Mode.DX10Clamp;
230 FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign;
232 Mode.FP32Denormals.Output != DenormalMode::PreserveSign;
234 Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign;
236 Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign;
237 }
238
239 bool operator ==(const SIMode Other) const {
240 return IEEE == Other.IEEE &&
241 DX10Clamp == Other.DX10Clamp &&
242 FP32InputDenormals == Other.FP32InputDenormals &&
243 FP32OutputDenormals == Other.FP32OutputDenormals &&
244 FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
245 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
246 }
247};
248
249template <> struct MappingTraits<SIMode> {
250 static void mapping(IO &YamlIO, SIMode &Mode) {
251 YamlIO.mapOptional("ieee", Mode.IEEE, true);
252 YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
253 YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
254 YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
255 YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
256 YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
257 }
258};
259
266 bool IsEntryFunction = false;
267 bool IsChainFunction = false;
269 bool MemoryBound = false;
270 bool WaveLimiter = false;
271 bool HasSpilledSGPRs = false;
272 bool HasSpilledVGPRs = false;
274
275 // TODO: 10 may be a better default since it's the maximum.
276 unsigned Occupancy = 0;
277
280
281 StringValue ScratchRSrcReg = "$private_rsrc_reg";
284
286 bool ReturnsVoid = true;
287
288 std::optional<SIArgumentInfo> ArgInfo;
289
290 unsigned PSInputAddr = 0;
291 unsigned PSInputEnable = 0;
293
295 std::optional<FrameIndex> ScavengeFI;
299
300 bool HasInitWholeWave = false;
301
304 const TargetRegisterInfo &TRI,
305 const llvm::MachineFunction &MF);
306
307 void mappingImpl(yaml::IO &YamlIO) override;
309};
310
312 static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
313 YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
314 UINT64_C(0));
315 YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign);
316 YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
317 YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u);
318 YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
319 YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
320 YamlIO.mapOptional("isChainFunction", MFI.IsChainFunction, false);
321 YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
322 YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
323 YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
324 YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
325 YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
326 YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
327 StringValue("$private_rsrc_reg"));
328 YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
329 StringValue("$fp_reg"));
330 YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
331 StringValue("$sp_reg"));
332 YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u);
333 YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true);
334 YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
335 YamlIO.mapOptional("psInputAddr", MFI.PSInputAddr, 0u);
336 YamlIO.mapOptional("psInputEnable", MFI.PSInputEnable, 0u);
337 YamlIO.mapOptional("maxMemoryClusterDWords", MFI.MaxMemoryClusterDWords,
339 YamlIO.mapOptional("mode", MFI.Mode, SIMode());
340 YamlIO.mapOptional("highBitsOf32BitAddress",
341 MFI.HighBitsOf32BitAddress, 0u);
342 YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
343 YamlIO.mapOptional("spillPhysVGPRs", MFI.SpillPhysVGPRS);
344 YamlIO.mapOptional("wwmReservedRegs", MFI.WWMReservedRegs);
345 YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
346 YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy,
347 StringValue()); // Don't print out when it's empty.
348 YamlIO.mapOptional("sgprForEXECCopy", MFI.SGPRForEXECCopy,
349 StringValue()); // Don't print out when it's empty.
350 YamlIO.mapOptional("longBranchReservedReg", MFI.LongBranchReservedReg,
351 StringValue());
352 YamlIO.mapOptional("hasInitWholeWave", MFI.HasInitWholeWave, false);
353 }
354};
355
356} // end namespace yaml
357
358// A CSR SGPR value can be preserved inside a callee using one of the following
359// methods.
360// 1. Copy to an unused scratch SGPR.
361// 2. Spill to a VGPR lane.
362// 3. Spill to memory via. a scratch VGPR.
363// class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used
364// for an SGPR at function prolog/epilog.
365enum class SGPRSaveKind : uint8_t {
369};
370
372 SGPRSaveKind Kind;
373 union {
374 int Index;
376 };
377
378public:
381 : Kind(K), Reg(R) {}
382 Register getReg() const { return Reg; }
383 int getIndex() const { return Index; }
384 SGPRSaveKind getKind() const { return Kind; }
385};
386
387/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
388/// tells the hardware which interpolation parameters to load.
391 friend class GCNTargetMachine;
392
393 // State of MODE register, assumed FP mode.
395
396 // Registers that may be reserved for spilling purposes. These may be the same
397 // as the input registers.
398 Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
399
400 // This is the unswizzled offset from the current dispatch's scratch wave
401 // base to the beginning of the current function's frame.
402 Register FrameOffsetReg = AMDGPU::FP_REG;
403
404 // This is an ABI register used in the non-entry calling convention to
405 // communicate the unswizzled offset from the current dispatch's scratch wave
406 // base to the beginning of the new function's frame.
407 Register StackPtrOffsetReg = AMDGPU::SP_REG;
408
409 // Registers that may be reserved when RA doesn't allocate enough
410 // registers to plan for the case where an indirect branch ends up
411 // being needed during branch relaxation.
412 Register LongBranchReservedReg;
413
415
416 // Graphics info.
417 unsigned PSInputAddr = 0;
418 unsigned PSInputEnable = 0;
419
420 /// Number of bytes of arguments this function has on the stack. If the callee
421 /// is expected to restore the argument stack this should be a multiple of 16,
422 /// all usable during a tail call.
423 ///
424 /// The alternative would forbid tail call optimisation in some cases: if we
425 /// want to transfer control from a function with 8-bytes of stack-argument
426 /// space to a function with 16-bytes then misalignment of this value would
427 /// make a stack adjustment necessary, which could not be undone by the
428 /// callee.
429 unsigned BytesInStackArgArea = 0;
430
431 bool ReturnsVoid = true;
432
433 // A pair of default/requested minimum/maximum flat work group sizes.
434 // Minimum - first, maximum - second.
435 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
436
437 // A pair of default/requested minimum/maximum number of waves per execution
438 // unit. Minimum - first, maximum - second.
439 std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
440
441 const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
442
443 // Default/requested number of work groups for the function.
444 SmallVector<unsigned> MaxNumWorkGroups = {0, 0, 0};
445
446private:
447 unsigned NumUserSGPRs = 0;
448 unsigned NumSystemSGPRs = 0;
449
450 bool HasSpilledSGPRs = false;
451 bool HasSpilledVGPRs = false;
452 bool HasNonSpillStackObjects = false;
453 bool IsStackRealigned = false;
454
455 unsigned NumSpilledSGPRs = 0;
456 unsigned NumSpilledVGPRs = 0;
457
458 // Tracks information about user SGPRs that will be setup by hardware which
459 // will apply to all wavefronts of the grid.
460 GCNUserSGPRUsageInfo UserSGPRInfo;
461
462 // Feature bits required for inputs passed in system SGPRs.
463 bool WorkGroupIDX : 1; // Always initialized.
464 bool WorkGroupIDY : 1;
465 bool WorkGroupIDZ : 1;
466 bool WorkGroupInfo : 1;
467 bool LDSKernelId : 1;
468 bool PrivateSegmentWaveByteOffset : 1;
469
470 bool WorkItemIDX : 1; // Always initialized.
471 bool WorkItemIDY : 1;
472 bool WorkItemIDZ : 1;
473
474 // Pointer to where the ABI inserts special kernel arguments separate from the
475 // user arguments. This is an offset from the KernargSegmentPtr.
476 bool ImplicitArgPtr : 1;
477
478 bool MayNeedAGPRs : 1;
479
480 // The hard-wired high half of the address of the global information table
481 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
482 // current hardware only allows a 16 bit value.
483 unsigned GITPtrHigh;
484
485 unsigned HighBitsOf32BitAddress;
486
487 // Flags associated with the virtual registers.
488 IndexedMap<uint8_t, VirtReg2IndexFunctor> VRegFlags;
489
490 // Current recorded maximum possible occupancy.
491 unsigned Occupancy;
492
493 // Maximum number of dwords that can be clusterred during instruction
494 // scheduler stage.
495 unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
496
497 mutable std::optional<bool> UsesAGPRs;
498
499 MCPhysReg getNextUserSGPR() const;
500
501 MCPhysReg getNextSystemSGPR() const;
502
503 // MachineRegisterInfo callback functions to notify events.
504 void MRI_NoteNewVirtualRegister(Register Reg) override;
505 void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override;
506
507public:
510 bool FullyAllocated = false;
511 bool IsDead = false;
512 };
513
514private:
515 // To track virtual VGPR + lane index for each subregister of the SGPR spilled
516 // to frameindex key during SILowerSGPRSpills pass.
518 SGPRSpillsToVirtualVGPRLanes;
519 // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs
520 // like Frame Pointer identified during PrologEpilogInserter.
522 SGPRSpillsToPhysicalVGPRLanes;
523 unsigned NumVirtualVGPRSpillLanes = 0;
524 unsigned NumPhysicalVGPRSpillLanes = 0;
525 SmallVector<Register, 2> SpillVGPRs;
526 SmallVector<Register, 2> SpillPhysVGPRs;
528 // To track the registers used in instructions that can potentially modify the
529 // inactive lanes. The WWM instructions and the writelane instructions for
530 // spilling SGPRs to VGPRs fall under such category of operations. The VGPRs
531 // modified by them should be spilled/restored at function prolog/epilog to
532 // avoid any undesired outcome. Each entry in this map holds a pair of values,
533 // the VGPR and its stack slot index.
534 WWMSpillsMap WWMSpills;
535
536 // Before allocation, the VGPR registers are partitioned into two distinct
537 // sets, the first one for WWM-values and the second set for non-WWM values.
538 // The latter set should be reserved during WWM-regalloc.
539 BitVector NonWWMRegMask;
540
542 // To track the VGPRs reserved for WWM instructions. They get stack slots
543 // later during PrologEpilogInserter and get added into the superset WWMSpills
544 // for actual spilling. A separate set makes the register reserved part and
545 // the serialization easier.
546 ReservedRegSet WWMReservedRegs;
547
548 using PrologEpilogSGPRSpill =
549 std::pair<Register, PrologEpilogSGPRSaveRestoreInfo>;
550 // To track the SGPR spill method used for a CSR SGPR register during
551 // frame lowering. Even though the SGPR spills are handled during
552 // SILowerSGPRSpills pass, some special handling needed later during the
553 // PrologEpilogInserter.
554 SmallVector<PrologEpilogSGPRSpill, 3> PrologEpilogSGPRSpills;
555
556 // To save/restore EXEC MASK around WWM spills and copies.
557 Register SGPRForEXECCopy;
558
559 DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
560
561 // AGPRs used for VGPR spills.
563
564 // VGPRs used for AGPR spills.
566
567 // Emergency stack slot. Sometimes, we create this before finalizing the stack
568 // frame, so save it here and add it to the RegScavenger later.
569 std::optional<int> ScavengeFI;
570
571private:
572 Register VGPRForAGPRCopy;
573
574 bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI,
575 unsigned LaneIndex);
576 bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI,
577 unsigned LaneIndex,
578 bool IsPrologEpilog);
579
580public:
582 return VGPRForAGPRCopy;
583 }
584
585 void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) {
586 VGPRForAGPRCopy = NewVGPRForAGPRCopy;
587 }
588
589 bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const;
590
591public:
593 SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI);
594
598 const override;
599
601 const MachineFunction &MF,
603 SMDiagnostic &Error, SMRange &SourceRange);
604
605 void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
606 bool isWWMReg(Register Reg) const {
607 return Reg.isVirtual() ? checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)
608 : WWMReservedRegs.contains(Reg);
609 }
610
611 void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; }
612 BitVector getNonWWMRegMask() const { return NonWWMRegMask; }
613 void clearNonWWMRegAllocMask() { NonWWMRegMask.clear(); }
614
615 SIModeRegisterDefaults getMode() const { return Mode; }
616
618 getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const {
619 auto I = SGPRSpillsToVirtualVGPRLanes.find(FrameIndex);
620 return (I == SGPRSpillsToVirtualVGPRLanes.end())
622 : ArrayRef(I->second);
623 }
624
625 ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
626 ArrayRef<Register> getSGPRSpillPhysVGPRs() const { return SpillPhysVGPRs; }
627
628 const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
629 const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
630
632 assert(is_sorted(PrologEpilogSGPRSpills, llvm::less_first()));
633 return PrologEpilogSGPRSpills;
634 }
635
636 GCNUserSGPRUsageInfo &getUserSGPRInfo() { return UserSGPRInfo; }
637
638 const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; }
639
643
644 // Insert a new entry in the right place to keep the vector in sorted order.
645 // This should be cheap since the vector is expected to be very short.
646 PrologEpilogSGPRSpills.insert(
648 PrologEpilogSGPRSpills, Reg,
649 [](const auto &LHS, const auto &RHS) { return LHS < RHS.first; }),
650 std::make_pair(Reg, SI));
651 }
652
653 // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
654 // on success and false otherwise.
656 const auto *I = find_if(PrologEpilogSGPRSpills, [&Reg](const auto &Spill) {
657 return Spill.first == Reg;
658 });
659 return I != PrologEpilogSGPRSpills.end();
660 }
661
662 // Get the scratch SGPR if allocated to save/restore \p Reg.
664 const auto *I = find_if(PrologEpilogSGPRSpills, [&Reg](const auto &Spill) {
665 return Spill.first == Reg;
666 });
667 if (I != PrologEpilogSGPRSpills.end() &&
668 I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
669 return I->second.getReg();
670
671 return AMDGPU::NoRegister;
672 }
673
674 // Get all scratch SGPRs allocated to copy/restore the SGPR spills.
676 for (const auto &SI : PrologEpilogSGPRSpills) {
677 if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
678 Regs.push_back(SI.second.getReg());
679 }
680 }
681
682 // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI.
684 return find_if(PrologEpilogSGPRSpills,
685 [FI](const std::pair<Register,
687 return SI.second.getKind() ==
689 SI.second.getIndex() == FI;
690 }) != PrologEpilogSGPRSpills.end();
691 }
692
695 const auto *I = find_if(PrologEpilogSGPRSpills, [&Reg](const auto &Spill) {
696 return Spill.first == Reg;
697 });
698 assert(I != PrologEpilogSGPRSpills.end());
699
700 return I->second;
701 }
702
704 getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const {
705 auto I = SGPRSpillsToPhysicalVGPRLanes.find(FrameIndex);
706 return (I == SGPRSpillsToPhysicalVGPRLanes.end())
708 : ArrayRef(I->second);
709 }
710
712 assert(Reg.isVirtual());
713 if (VRegFlags.inBounds(Reg))
714 VRegFlags[Reg] |= Flag;
715 }
716
717 bool checkFlag(Register Reg, uint8_t Flag) const {
718 if (Reg.isPhysical())
719 return false;
720
721 return VRegFlags.inBounds(Reg) && VRegFlags[Reg] & Flag;
722 }
723
724 bool hasVRegFlags() { return VRegFlags.size(); }
725
727 Align Alignment = Align(4));
728
730 MachineFunction &MF,
731 SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
732 SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const;
733
735 return SpillAGPR;
736 }
737
738 Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; }
739
740 void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; }
741
743 return SpillVGPR;
744 }
745
746 MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
747 auto I = VGPRToAGPRSpills.find(FrameIndex);
748 return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
749 : I->second.Lanes[Lane];
750 }
751
752 void setVGPRToAGPRSpillDead(int FrameIndex) {
753 auto I = VGPRToAGPRSpills.find(FrameIndex);
754 if (I != VGPRToAGPRSpills.end())
755 I->second.IsDead = true;
756 }
757
758 // To bring the allocated WWM registers in \p WWMVGPRs to the lowest available
759 // range.
762 BitVector &SavedVGPRs);
763
765 bool SpillToPhysVGPRLane = false,
766 bool IsPrologEpilog = false);
767 bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
768
769 /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
770 /// to the default stack.
772 bool ResetSGPRSpillStackIDs);
773
775 std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
776
777 unsigned getBytesInStackArgArea() const {
778 return BytesInStackArgArea;
779 }
780
781 void setBytesInStackArgArea(unsigned Bytes) {
782 BytesInStackArgArea = Bytes;
783 }
784
785 // Add user SGPRs.
797 unsigned AllocSizeDWord, int KernArgIdx,
798 int PaddingSGPRs);
799
800 /// Increment user SGPRs used for padding the argument list only.
802 Register Next = getNextUserSGPR();
803 ++NumUserSGPRs;
804 return Next;
805 }
806
807 // Add system SGPRs.
809 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
810 NumSystemSGPRs += 1;
811 return ArgInfo.WorkGroupIDX.getRegister();
812 }
813
815 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
816 NumSystemSGPRs += 1;
817 return ArgInfo.WorkGroupIDY.getRegister();
818 }
819
821 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
822 NumSystemSGPRs += 1;
823 return ArgInfo.WorkGroupIDZ.getRegister();
824 }
825
827 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
828 NumSystemSGPRs += 1;
829 return ArgInfo.WorkGroupInfo.getRegister();
830 }
831
832 bool hasLDSKernelId() const { return LDSKernelId; }
833
834 // Add special VGPR inputs
836 ArgInfo.WorkItemIDX = Arg;
837 }
838
840 ArgInfo.WorkItemIDY = Arg;
841 }
842
844 ArgInfo.WorkItemIDZ = Arg;
845 }
846
848 ArgInfo.PrivateSegmentWaveByteOffset
849 = ArgDescriptor::createRegister(getNextSystemSGPR());
850 NumSystemSGPRs += 1;
851 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
852 }
853
855 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
856 }
857
858 bool hasWorkGroupIDX() const {
859 return WorkGroupIDX;
860 }
861
862 bool hasWorkGroupIDY() const {
863 return WorkGroupIDY;
864 }
865
866 bool hasWorkGroupIDZ() const {
867 return WorkGroupIDZ;
868 }
869
870 bool hasWorkGroupInfo() const {
871 return WorkGroupInfo;
872 }
873
875 return PrivateSegmentWaveByteOffset;
876 }
877
878 bool hasWorkItemIDX() const {
879 return WorkItemIDX;
880 }
881
882 bool hasWorkItemIDY() const {
883 return WorkItemIDY;
884 }
885
886 bool hasWorkItemIDZ() const {
887 return WorkItemIDZ;
888 }
889
890 bool hasImplicitArgPtr() const {
891 return ImplicitArgPtr;
892 }
893
895 return ArgInfo;
896 }
897
899 return ArgInfo;
900 }
901
902 std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
904 return ArgInfo.getPreloadedValue(Value);
905 }
906
908 const auto *Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
909 return Arg ? Arg->getRegister() : MCRegister();
910 }
911
912 unsigned getGITPtrHigh() const {
913 return GITPtrHigh;
914 }
915
916 Register getGITPtrLoReg(const MachineFunction &MF) const;
917
919 return HighBitsOf32BitAddress;
920 }
921
922 unsigned getNumUserSGPRs() const {
923 return NumUserSGPRs;
924 }
925
926 unsigned getNumPreloadedSGPRs() const {
927 return NumUserSGPRs + NumSystemSGPRs;
928 }
929
931 return UserSGPRInfo.getNumKernargPreloadSGPRs();
932 }
933
935 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
936 }
937
938 /// Returns the physical register reserved for use as the resource
939 /// descriptor for scratch accesses.
941 return ScratchRSrcReg;
942 }
943
945 assert(Reg != 0 && "Should never be unset");
946 ScratchRSrcReg = Reg;
947 }
948
950 return FrameOffsetReg;
951 }
952
954 assert(Reg != 0 && "Should never be unset");
955 FrameOffsetReg = Reg;
956 }
957
959 assert(Reg != 0 && "Should never be unset");
960 StackPtrOffsetReg = Reg;
961 }
962
963 void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; }
964
965 // Note the unset value for this is AMDGPU::SP_REG rather than
966 // NoRegister. This is mostly a workaround for MIR tests where state that
967 // can't be directly computed from the function is not preserved in serialized
968 // MIR.
970 return StackPtrOffsetReg;
971 }
972
973 Register getLongBranchReservedReg() const { return LongBranchReservedReg; }
974
976 return ArgInfo.QueuePtr.getRegister();
977 }
978
980 return ArgInfo.ImplicitBufferPtr.getRegister();
981 }
982
983 bool hasSpilledSGPRs() const {
984 return HasSpilledSGPRs;
985 }
986
987 void setHasSpilledSGPRs(bool Spill = true) {
988 HasSpilledSGPRs = Spill;
989 }
990
991 bool hasSpilledVGPRs() const {
992 return HasSpilledVGPRs;
993 }
994
995 void setHasSpilledVGPRs(bool Spill = true) {
996 HasSpilledVGPRs = Spill;
997 }
998
1000 return HasNonSpillStackObjects;
1001 }
1002
1003 void setHasNonSpillStackObjects(bool StackObject = true) {
1004 HasNonSpillStackObjects = StackObject;
1005 }
1006
1007 bool isStackRealigned() const {
1008 return IsStackRealigned;
1009 }
1010
1011 void setIsStackRealigned(bool Realigned = true) {
1012 IsStackRealigned = Realigned;
1013 }
1014
1015 unsigned getNumSpilledSGPRs() const {
1016 return NumSpilledSGPRs;
1017 }
1018
1019 unsigned getNumSpilledVGPRs() const {
1020 return NumSpilledVGPRs;
1021 }
1022
1023 void addToSpilledSGPRs(unsigned num) {
1024 NumSpilledSGPRs += num;
1025 }
1026
1027 void addToSpilledVGPRs(unsigned num) {
1028 NumSpilledVGPRs += num;
1029 }
1030
1031 unsigned getPSInputAddr() const {
1032 return PSInputAddr;
1033 }
1034
1035 unsigned getPSInputEnable() const {
1036 return PSInputEnable;
1037 }
1038
1039 bool isPSInputAllocated(unsigned Index) const {
1040 return PSInputAddr & (1 << Index);
1041 }
1042
1044 PSInputAddr |= 1 << Index;
1045 }
1046
1047 void markPSInputEnabled(unsigned Index) {
1048 PSInputEnable |= 1 << Index;
1049 }
1050
1051 bool returnsVoid() const {
1052 return ReturnsVoid;
1053 }
1054
1056 ReturnsVoid = Value;
1057 }
1058
1059 /// \returns A pair of default/requested minimum/maximum flat work group sizes
1060 /// for this function.
1061 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
1062 return FlatWorkGroupSizes;
1063 }
1064
1065 /// \returns Default/requested minimum flat work group size for this function.
1066 unsigned getMinFlatWorkGroupSize() const {
1067 return FlatWorkGroupSizes.first;
1068 }
1069
1070 /// \returns Default/requested maximum flat work group size for this function.
1071 unsigned getMaxFlatWorkGroupSize() const {
1072 return FlatWorkGroupSizes.second;
1073 }
1074
1075 /// \returns A pair of default/requested minimum/maximum number of waves per
1076 /// execution unit.
1077 std::pair<unsigned, unsigned> getWavesPerEU() const {
1078 return WavesPerEU;
1079 }
1080
1081 /// \returns Default/requested minimum number of waves per execution unit.
1082 unsigned getMinWavesPerEU() const {
1083 return WavesPerEU.first;
1084 }
1085
1086 /// \returns Default/requested maximum number of waves per execution unit.
1087 unsigned getMaxWavesPerEU() const {
1088 return WavesPerEU.second;
1089 }
1090
1093 return &GWSResourcePSV;
1094 }
1095
1096 unsigned getOccupancy() const {
1097 return Occupancy;
1098 }
1099
1100 unsigned getMinAllowedOccupancy() const {
1101 if (!isMemoryBound() && !needsWaveLimiter())
1102 return Occupancy;
1103 return (Occupancy < 4) ? Occupancy : 4;
1104 }
1105
1106 void limitOccupancy(const MachineFunction &MF);
1107
1108 void limitOccupancy(unsigned Limit) {
1109 if (Occupancy > Limit)
1110 Occupancy = Limit;
1111 }
1112
1113 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
1114 if (Occupancy < Limit)
1115 Occupancy = Limit;
1116 limitOccupancy(MF);
1117 }
1118
1119 unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }
1120
1121 bool mayNeedAGPRs() const {
1122 return MayNeedAGPRs;
1123 }
1124
1125 // \returns true if a function has a use of AGPRs via inline asm or
1126 // has a call which may use it.
1127 bool mayUseAGPRs(const Function &F) const;
1128
1129 // \returns true if a function needs or may need AGPRs.
1130 bool usesAGPRs(const MachineFunction &MF) const;
1131
1132 /// \returns Default/requested number of work groups for this function.
1133 SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
1134
1135 unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; }
1136 unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; }
1137 unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; }
1138};
1139
1140} // end namespace llvm
1141
1142#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
Provides AMDGPU specific target descriptions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
basic Basic Alias true
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint32_t Index
uint64_t Size
IO & YamlIO
Definition: ELFYAML.cpp:1312
AMD GCN specific subclass of TargetSubtarget.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition: Mem2Reg.cpp:110
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
Value * RHS
Value * LHS
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
static bool classof(const PseudoSourceValue *V)
AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
bool isConstant(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue has a constant value.
AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R)
PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I)
Special value supplied for machine level alias analysis.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
bool usesAGPRs(const MachineFunction &MF) const
bool isPSInputAllocated(unsigned Index) const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setWorkItemIDY(ArgDescriptor Arg)
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
Register addPrivateSegmentSize(const SIRegisterInfo &TRI)
void setWorkItemIDZ(ArgDescriptor Arg)
std::pair< unsigned, unsigned > getWavesPerEU() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
void setFlag(Register Reg, uint8_t Flag)
void setVGPRToAGPRSpillDead(int FrameIndex)
std::pair< unsigned, unsigned > getFlatWorkGroupSizes() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
void setStackPtrOffsetReg(Register Reg)
Register addReservedUserSGPR()
Increment user SGPRs used for padding the argument list only.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
ArrayRef< Register > getSGPRSpillPhysVGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register addQueuePtr(const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
SmallVector< unsigned > getMaxNumWorkGroups() const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
void setBytesInStackArgArea(unsigned Bytes)
SIModeRegisterDefaults getMode() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
const GCNUserSGPRUsageInfo & getUserSGPRInfo() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setPrivateSegmentWaveByteOffset(Register Reg)
void setLongBranchReservedReg(Register Reg)
const AMDGPUFunctionArgInfo & getArgInfo() const
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void markPSInputAllocated(unsigned Index)
void setWorkItemIDX(ArgDescriptor Arg)
bool isWWMReg(Register Reg) const
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
bool checkFlag(Register Reg, uint8_t Flag) const
void markPSInputEnabled(unsigned Index)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
void setHasSpilledSGPRs(bool Spill=true)
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
void updateNonWWMRegMask(BitVector &RegMask)
unsigned getNumKernargPreloadedSGPRs() const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
AMDGPUFunctionArgInfo & getArgInfo()
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setHasNonSpillStackObjects(bool StackObject=true)
void setIsStackRealigned(bool Realigned=true)
void limitOccupancy(const MachineFunction &MF)
ArrayRef< Register > getSGPRSpillVGPRs() const
SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:281
Represents a range in source code.
Definition: SMLoc.h:48
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1991
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1926
@ Other
Any other memory.
constexpr unsigned DefaultMemoryClusterDWordsLimit
Definition: SIInstrInfo.h:39
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
Function object to check whether the first component of a container supported by std::get (like std::...
Definition: STLExtras.h:1467
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
static void mapping(IO &YamlIO, SIArgumentInfo &AI)
static void mapping(IO &YamlIO, SIArgument &A)
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI)
static void mapping(IO &YamlIO, SIMode &Mode)
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
SIArgument(const SIArgument &Other)
SIArgument & operator=(const SIArgument &Other)
static SIArgument createArgument(bool IsReg)
SmallVector< StringValue > WWMReservedRegs
void mappingImpl(yaml::IO &YamlIO) override
std::optional< SIArgumentInfo > ArgInfo
SmallVector< StringValue, 2 > SpillPhysVGPRS
std::optional< FrameIndex > ScavengeFI
SIMode(const SIModeRegisterDefaults &Mode)
A wrapper around std::string which contains a source range that's being set during parsing.