LLVM  10.0.0svn
SIMachineFunctionInfo.h
Go to the documentation of this file.
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 
17 #include "AMDGPUMachineFunction.h"
19 #include "SIInstrInfo.h"
20 #include "SIRegisterInfo.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/MC/MCRegisterInfo.h"
32 #include <array>
33 #include <cassert>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class MachineFrameInfo;
40 class MachineFunction;
41 class TargetRegisterClass;
42 
44 public:
45  enum AMDGPUPSVKind : unsigned {
49  };
50 
51 protected:
53  : PseudoSourceValue(Kind, TII) {}
54 
55 public:
56  bool isConstant(const MachineFrameInfo *) const override {
57  // This should probably be true for most images, but we will start by being
58  // conservative.
59  return false;
60  }
61 
62  bool isAliased(const MachineFrameInfo *) const override {
63  return true;
64  }
65 
66  bool mayAlias(const MachineFrameInfo *) const override {
67  return true;
68  }
69 };
70 
72 public:
75 
76  static bool classof(const PseudoSourceValue *V) {
77  return V->kind() == PSVBuffer;
78  }
79 };
80 
82 public:
83  // TODO: Is the img rsrc useful?
86 
87  static bool classof(const PseudoSourceValue *V) {
88  return V->kind() == PSVImage;
89  }
90 };
91 
93 public:
96 
97  static bool classof(const PseudoSourceValue *V) {
98  return V->kind() == GWSResource;
99  }
100 
101  // These are inaccessible memory from IR.
102  bool isAliased(const MachineFrameInfo *) const override {
103  return false;
104  }
105 
106  // These are inaccessible memory from IR.
107  bool mayAlias(const MachineFrameInfo *) const override {
108  return false;
109  }
110 
111  void printCustom(raw_ostream &OS) const override {
112  OS << "GWSResource";
113  }
114 };
115 
116 namespace yaml {
117 
118 struct SIArgument {
120  union {
122  unsigned StackOffset;
123  };
125 
126  // Default constructor, which creates a stack argument.
127  SIArgument() : IsRegister(false), StackOffset(0) {}
128  SIArgument(const SIArgument &Other) {
129  IsRegister = Other.IsRegister;
130  if (IsRegister) {
131  ::new ((void *)std::addressof(RegisterName))
132  StringValue(Other.RegisterName);
133  } else
134  StackOffset = Other.StackOffset;
135  Mask = Other.Mask;
136  }
137  SIArgument &operator=(const SIArgument &Other) {
138  IsRegister = Other.IsRegister;
139  if (IsRegister) {
140  ::new ((void *)std::addressof(RegisterName))
141  StringValue(Other.RegisterName);
142  } else
143  StackOffset = Other.StackOffset;
144  Mask = Other.Mask;
145  return *this;
146  }
148  if (IsRegister)
149  RegisterName.~StringValue();
150  }
151 
152  // Helper to create a register or stack argument.
153  static inline SIArgument createArgument(bool IsReg) {
154  if (IsReg)
155  return SIArgument(IsReg);
156  return SIArgument();
157  }
158 
159 private:
160  // Construct a register argument.
161  SIArgument(bool) : IsRegister(true), RegisterName() {}
162 };
163 
164 template <> struct MappingTraits<SIArgument> {
165  static void mapping(IO &YamlIO, SIArgument &A) {
166  if (YamlIO.outputting()) {
167  if (A.IsRegister)
168  YamlIO.mapRequired("reg", A.RegisterName);
169  else
170  YamlIO.mapRequired("offset", A.StackOffset);
171  } else {
172  auto Keys = YamlIO.keys();
173  if (is_contained(Keys, "reg")) {
174  A = SIArgument::createArgument(true);
175  YamlIO.mapRequired("reg", A.RegisterName);
176  } else if (is_contained(Keys, "offset"))
177  YamlIO.mapRequired("offset", A.StackOffset);
178  else
179  YamlIO.setError("missing required key 'reg' or 'offset'");
180  }
181  YamlIO.mapOptional("mask", A.Mask);
182  }
183  static const bool flow = true;
184 };
185 
194 
200 
203 
207 };
208 
209 template <> struct MappingTraits<SIArgumentInfo> {
210  static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
211  YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
212  YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
213  YamlIO.mapOptional("queuePtr", AI.QueuePtr);
214  YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
215  YamlIO.mapOptional("dispatchID", AI.DispatchID);
216  YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
217  YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
218 
219  YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
220  YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
221  YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
222  YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
223  YamlIO.mapOptional("privateSegmentWaveByteOffset",
225 
226  YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
227  YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
228 
229  YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
230  YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
231  YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
232  }
233 };
234 
235 // Default to default mode for default calling convention.
236 struct SIMode {
237  bool IEEE = true;
238  bool DX10Clamp = true;
239 
240  SIMode() = default;
241 
242 
244  IEEE = Mode.IEEE;
245  DX10Clamp = Mode.DX10Clamp;
246  }
247 
248  bool operator ==(const SIMode Other) const {
249  return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
250  }
251 };
252 
253 template <> struct MappingTraits<SIMode> {
254  static void mapping(IO &YamlIO, SIMode &Mode) {
255  YamlIO.mapOptional("ieee", Mode.IEEE, true);
256  YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
257  }
258 };
259 
261  uint64_t ExplicitKernArgSize = 0;
262  unsigned MaxKernArgAlign = 0;
263  unsigned LDSSize = 0;
264  bool IsEntryFunction = false;
265  bool NoSignedZerosFPMath = false;
266  bool MemoryBound = false;
267  bool WaveLimiter = false;
268  uint32_t HighBitsOf32BitAddress = 0;
269 
270  StringValue ScratchRSrcReg = "$private_rsrc_reg";
271  StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
272  StringValue FrameOffsetReg = "$fp_reg";
273  StringValue StackPtrOffsetReg = "$sp_reg";
274 
277 
278  SIMachineFunctionInfo() = default;
280  const TargetRegisterInfo &TRI);
281 
282  void mappingImpl(yaml::IO &YamlIO) override;
283  ~SIMachineFunctionInfo() = default;
284 };
285 
287  static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
288  YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
289  UINT64_C(0));
290  YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
291  YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
292  YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
293  YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
294  YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
295  YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
296  YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
297  StringValue("$private_rsrc_reg"));
298  YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
299  StringValue("$scratch_wave_offset_reg"));
300  YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
301  StringValue("$fp_reg"));
302  YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
303  StringValue("$sp_reg"));
304  YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
305  YamlIO.mapOptional("mode", MFI.Mode, SIMode());
306  YamlIO.mapOptional("highBitsOf32BitAddress",
307  MFI.HighBitsOf32BitAddress, 0u);
308  }
309 };
310 
311 } // end namespace yaml
312 
313 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
314 /// tells the hardware which interpolation parameters to load.
316  friend class GCNTargetMachine;
317 
318  unsigned TIDReg = AMDGPU::NoRegister;
319 
320  // Registers that may be reserved for spilling purposes. These may be the same
321  // as the input registers.
322  unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
323  unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
324 
325  // This is the current function's incremented size from the kernel's scratch
326  // wave offset register. For an entry function, this is exactly the same as
327  // the ScratchWaveOffsetReg.
328  unsigned FrameOffsetReg = AMDGPU::FP_REG;
329 
330  // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
331  unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
332 
333  AMDGPUFunctionArgInfo ArgInfo;
334 
335  // State of MODE register, assumed FP mode.
337 
338  // Graphics info.
339  unsigned PSInputAddr = 0;
340  unsigned PSInputEnable = 0;
341 
342  /// Number of bytes of arguments this function has on the stack. If the callee
343  /// is expected to restore the argument stack this should be a multiple of 16,
344  /// all usable during a tail call.
345  ///
346  /// The alternative would forbid tail call optimisation in some cases: if we
347  /// want to transfer control from a function with 8-bytes of stack-argument
348  /// space to a function with 16-bytes then misalignment of this value would
349  /// make a stack adjustment necessary, which could not be undone by the
350  /// callee.
351  unsigned BytesInStackArgArea = 0;
352 
353  bool ReturnsVoid = true;
354 
355  // A pair of default/requested minimum/maximum flat work group sizes.
356  // Minimum - first, maximum - second.
357  std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
358 
359  // A pair of default/requested minimum/maximum number of waves per execution
360  // unit. Minimum - first, maximum - second.
361  std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
362 
363  DenseMap<const Value *,
364  std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
365  DenseMap<const Value *,
366  std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
367  std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
368 
369 private:
370  unsigned LDSWaveSpillSize = 0;
371  unsigned NumUserSGPRs = 0;
372  unsigned NumSystemSGPRs = 0;
373 
374  bool HasSpilledSGPRs = false;
375  bool HasSpilledVGPRs = false;
376  bool HasNonSpillStackObjects = false;
377  bool IsStackRealigned = false;
378 
379  unsigned NumSpilledSGPRs = 0;
380  unsigned NumSpilledVGPRs = 0;
381 
382  // Feature bits required for inputs passed in user SGPRs.
383  bool PrivateSegmentBuffer : 1;
384  bool DispatchPtr : 1;
385  bool QueuePtr : 1;
386  bool KernargSegmentPtr : 1;
387  bool DispatchID : 1;
388  bool FlatScratchInit : 1;
389 
390  // Feature bits required for inputs passed in system SGPRs.
391  bool WorkGroupIDX : 1; // Always initialized.
392  bool WorkGroupIDY : 1;
393  bool WorkGroupIDZ : 1;
394  bool WorkGroupInfo : 1;
395  bool PrivateSegmentWaveByteOffset : 1;
396 
397  bool WorkItemIDX : 1; // Always initialized.
398  bool WorkItemIDY : 1;
399  bool WorkItemIDZ : 1;
400 
401  // Private memory buffer
402  // Compute directly in sgpr[0:1]
403  // Other shaders indirect 64-bits at sgpr[0:1]
404  bool ImplicitBufferPtr : 1;
405 
406  // Pointer to where the ABI inserts special kernel arguments separate from the
407  // user arguments. This is an offset from the KernargSegmentPtr.
408  bool ImplicitArgPtr : 1;
409 
410  // The hard-wired high half of the address of the global information table
411  // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
412  // current hardware only allows a 16 bit value.
413  unsigned GITPtrHigh;
414 
415  unsigned HighBitsOf32BitAddress;
416  unsigned GDSSize;
417 
418  // Current recorded maximum possible occupancy.
419  unsigned Occupancy;
420 
421  MCPhysReg getNextUserSGPR() const;
422 
423  MCPhysReg getNextSystemSGPR() const;
424 
425 public:
426  struct SpilledReg {
427  unsigned VGPR = 0;
428  int Lane = -1;
429 
430  SpilledReg() = default;
431  SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
432 
433  bool hasLane() { return Lane != -1;}
434  bool hasReg() { return VGPR != 0;}
435  };
436 
438  // VGPR used for SGPR spills
439  unsigned VGPR;
440 
441  // If the VGPR is a CSR, the stack slot used to save/restore it in the
442  // prolog/epilog.
444 
445  SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
446  };
447 
450  bool FullyAllocated = false;
451  };
452 
454 
455  void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
456 
457 private:
458  // SGPR->VGPR spilling support.
459  using SpillRegMask = std::pair<unsigned, unsigned>;
460 
461  // Track VGPR + wave index for each subregister of the SGPR spilled to
462  // frameindex key.
463  DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
464  unsigned NumVGPRSpillLanes = 0;
466 
467  DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
468 
469  // AGPRs used for VGPR spills.
470  SmallVector<MCPhysReg, 32> SpillAGPR;
471 
472  // VGPRs used for AGPR spills.
473  SmallVector<MCPhysReg, 32> SpillVGPR;
474 
475 public: // FIXME
476  /// If this is set, an SGPR used for save/restore of the register used for the
477  /// frame pointer.
478  unsigned SGPRForFPSaveRestoreCopy = 0;
480 
481 public:
483 
484  bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
485 
487  auto I = SGPRToVGPRSpills.find(FrameIndex);
488  return (I == SGPRToVGPRSpills.end()) ?
489  ArrayRef<SpilledReg>() : makeArrayRef(I->second);
490  }
491 
493  return SpillVGPRs;
494  }
495 
497  return SpillAGPR;
498  }
499 
501  return SpillVGPR;
502  }
503 
504  MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
505  auto I = VGPRToAGPRSpills.find(FrameIndex);
506  return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
507  : I->second.Lanes[Lane];
508  }
509 
511  return Mode;
512  }
513 
514  bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
515  unsigned NumLane) const;
516  bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
517  bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
518  void removeDeadFrameIndices(MachineFrameInfo &MFI);
519 
520  bool hasCalculatedTID() const { return TIDReg != 0; };
521  unsigned getTIDReg() const { return TIDReg; };
522  void setTIDReg(unsigned Reg) { TIDReg = Reg; }
523 
524  unsigned getBytesInStackArgArea() const {
525  return BytesInStackArgArea;
526  }
527 
528  void setBytesInStackArgArea(unsigned Bytes) {
529  BytesInStackArgArea = Bytes;
530  }
531 
532  // Add user SGPRs.
533  unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
534  unsigned addDispatchPtr(const SIRegisterInfo &TRI);
535  unsigned addQueuePtr(const SIRegisterInfo &TRI);
536  unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
537  unsigned addDispatchID(const SIRegisterInfo &TRI);
538  unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
539  unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
540 
541  // Add system SGPRs.
542  unsigned addWorkGroupIDX() {
543  ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
544  NumSystemSGPRs += 1;
545  return ArgInfo.WorkGroupIDX.getRegister();
546  }
547 
548  unsigned addWorkGroupIDY() {
549  ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
550  NumSystemSGPRs += 1;
551  return ArgInfo.WorkGroupIDY.getRegister();
552  }
553 
554  unsigned addWorkGroupIDZ() {
555  ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
556  NumSystemSGPRs += 1;
557  return ArgInfo.WorkGroupIDZ.getRegister();
558  }
559 
560  unsigned addWorkGroupInfo() {
561  ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
562  NumSystemSGPRs += 1;
563  return ArgInfo.WorkGroupInfo.getRegister();
564  }
565 
566  // Add special VGPR inputs
568  ArgInfo.WorkItemIDX = Arg;
569  }
570 
572  ArgInfo.WorkItemIDY = Arg;
573  }
574 
576  ArgInfo.WorkItemIDZ = Arg;
577  }
578 
581  = ArgDescriptor::createRegister(getNextSystemSGPR());
582  NumSystemSGPRs += 1;
583  return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
584  }
585 
588  }
589 
590  bool hasPrivateSegmentBuffer() const {
591  return PrivateSegmentBuffer;
592  }
593 
594  bool hasDispatchPtr() const {
595  return DispatchPtr;
596  }
597 
598  bool hasQueuePtr() const {
599  return QueuePtr;
600  }
601 
602  bool hasKernargSegmentPtr() const {
603  return KernargSegmentPtr;
604  }
605 
606  bool hasDispatchID() const {
607  return DispatchID;
608  }
609 
610  bool hasFlatScratchInit() const {
611  return FlatScratchInit;
612  }
613 
614  bool hasWorkGroupIDX() const {
615  return WorkGroupIDX;
616  }
617 
618  bool hasWorkGroupIDY() const {
619  return WorkGroupIDY;
620  }
621 
622  bool hasWorkGroupIDZ() const {
623  return WorkGroupIDZ;
624  }
625 
626  bool hasWorkGroupInfo() const {
627  return WorkGroupInfo;
628  }
629 
631  return PrivateSegmentWaveByteOffset;
632  }
633 
634  bool hasWorkItemIDX() const {
635  return WorkItemIDX;
636  }
637 
638  bool hasWorkItemIDY() const {
639  return WorkItemIDY;
640  }
641 
642  bool hasWorkItemIDZ() const {
643  return WorkItemIDZ;
644  }
645 
646  bool hasImplicitArgPtr() const {
647  return ImplicitArgPtr;
648  }
649 
650  bool hasImplicitBufferPtr() const {
651  return ImplicitBufferPtr;
652  }
653 
655  return ArgInfo;
656  }
657 
659  return ArgInfo;
660  }
661 
662  std::pair<const ArgDescriptor *, const TargetRegisterClass *>
664  return ArgInfo.getPreloadedValue(Value);
665  }
666 
668  auto Arg = ArgInfo.getPreloadedValue(Value).first;
669  return Arg ? Arg->getRegister() : Register();
670  }
671 
672  unsigned getGITPtrHigh() const {
673  return GITPtrHigh;
674  }
675 
677  return HighBitsOf32BitAddress;
678  }
679 
680  unsigned getGDSSize() const {
681  return GDSSize;
682  }
683 
684  unsigned getNumUserSGPRs() const {
685  return NumUserSGPRs;
686  }
687 
688  unsigned getNumPreloadedSGPRs() const {
689  return NumUserSGPRs + NumSystemSGPRs;
690  }
691 
693  return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
694  }
695 
696  /// Returns the physical register reserved for use as the resource
697  /// descriptor for scratch accesses.
698  unsigned getScratchRSrcReg() const {
699  return ScratchRSrcReg;
700  }
701 
702  void setScratchRSrcReg(unsigned Reg) {
703  assert(Reg != 0 && "Should never be unset");
704  ScratchRSrcReg = Reg;
705  }
706 
707  unsigned getScratchWaveOffsetReg() const {
708  return ScratchWaveOffsetReg;
709  }
710 
711  unsigned getFrameOffsetReg() const {
712  return FrameOffsetReg;
713  }
714 
715  void setFrameOffsetReg(unsigned Reg) {
716  assert(Reg != 0 && "Should never be unset");
717  FrameOffsetReg = Reg;
718  }
719 
720  void setStackPtrOffsetReg(unsigned Reg) {
721  assert(Reg != 0 && "Should never be unset");
722  StackPtrOffsetReg = Reg;
723  }
724 
725  // Note the unset value for this is AMDGPU::SP_REG rather than
726  // NoRegister. This is mostly a workaround for MIR tests where state that
727  // can't be directly computed from the function is not preserved in serialized
728  // MIR.
729  unsigned getStackPtrOffsetReg() const {
730  return StackPtrOffsetReg;
731  }
732 
733  void setScratchWaveOffsetReg(unsigned Reg) {
734  assert(Reg != 0 && "Should never be unset");
735  ScratchWaveOffsetReg = Reg;
736  }
737 
738  unsigned getQueuePtrUserSGPR() const {
739  return ArgInfo.QueuePtr.getRegister();
740  }
741 
742  unsigned getImplicitBufferPtrUserSGPR() const {
743  return ArgInfo.ImplicitBufferPtr.getRegister();
744  }
745 
746  bool hasSpilledSGPRs() const {
747  return HasSpilledSGPRs;
748  }
749 
750  void setHasSpilledSGPRs(bool Spill = true) {
751  HasSpilledSGPRs = Spill;
752  }
753 
754  bool hasSpilledVGPRs() const {
755  return HasSpilledVGPRs;
756  }
757 
758  void setHasSpilledVGPRs(bool Spill = true) {
759  HasSpilledVGPRs = Spill;
760  }
761 
762  bool hasNonSpillStackObjects() const {
763  return HasNonSpillStackObjects;
764  }
765 
766  void setHasNonSpillStackObjects(bool StackObject = true) {
767  HasNonSpillStackObjects = StackObject;
768  }
769 
770  bool isStackRealigned() const {
771  return IsStackRealigned;
772  }
773 
774  void setIsStackRealigned(bool Realigned = true) {
775  IsStackRealigned = Realigned;
776  }
777 
778  unsigned getNumSpilledSGPRs() const {
779  return NumSpilledSGPRs;
780  }
781 
782  unsigned getNumSpilledVGPRs() const {
783  return NumSpilledVGPRs;
784  }
785 
786  void addToSpilledSGPRs(unsigned num) {
787  NumSpilledSGPRs += num;
788  }
789 
790  void addToSpilledVGPRs(unsigned num) {
791  NumSpilledVGPRs += num;
792  }
793 
794  unsigned getPSInputAddr() const {
795  return PSInputAddr;
796  }
797 
798  unsigned getPSInputEnable() const {
799  return PSInputEnable;
800  }
801 
802  bool isPSInputAllocated(unsigned Index) const {
803  return PSInputAddr & (1 << Index);
804  }
805 
806  void markPSInputAllocated(unsigned Index) {
807  PSInputAddr |= 1 << Index;
808  }
809 
810  void markPSInputEnabled(unsigned Index) {
811  PSInputEnable |= 1 << Index;
812  }
813 
814  bool returnsVoid() const {
815  return ReturnsVoid;
816  }
817 
818  void setIfReturnsVoid(bool Value) {
819  ReturnsVoid = Value;
820  }
821 
822  /// \returns A pair of default/requested minimum/maximum flat work group sizes
823  /// for this function.
824  std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
825  return FlatWorkGroupSizes;
826  }
827 
828  /// \returns Default/requested minimum flat work group size for this function.
829  unsigned getMinFlatWorkGroupSize() const {
830  return FlatWorkGroupSizes.first;
831  }
832 
833  /// \returns Default/requested maximum flat work group size for this function.
834  unsigned getMaxFlatWorkGroupSize() const {
835  return FlatWorkGroupSizes.second;
836  }
837 
838  /// \returns A pair of default/requested minimum/maximum number of waves per
839  /// execution unit.
840  std::pair<unsigned, unsigned> getWavesPerEU() const {
841  return WavesPerEU;
842  }
843 
844  /// \returns Default/requested minimum number of waves per execution unit.
845  unsigned getMinWavesPerEU() const {
846  return WavesPerEU.first;
847  }
848 
849  /// \returns Default/requested maximum number of waves per execution unit.
850  unsigned getMaxWavesPerEU() const {
851  return WavesPerEU.second;
852  }
853 
854  /// \returns SGPR used for \p Dim's work group ID.
855  unsigned getWorkGroupIDSGPR(unsigned Dim) const {
856  switch (Dim) {
857  case 0:
858  assert(hasWorkGroupIDX());
859  return ArgInfo.WorkGroupIDX.getRegister();
860  case 1:
861  assert(hasWorkGroupIDY());
862  return ArgInfo.WorkGroupIDY.getRegister();
863  case 2:
864  assert(hasWorkGroupIDZ());
865  return ArgInfo.WorkGroupIDZ.getRegister();
866  }
867  llvm_unreachable("unexpected dimension");
868  }
869 
870  unsigned getLDSWaveSpillSize() const {
871  return LDSWaveSpillSize;
872  }
873 
875  const Value *BufferRsrc) {
876  assert(BufferRsrc);
877  auto PSV = BufferPSVs.try_emplace(
878  BufferRsrc,
879  std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
880  return PSV.first->second.get();
881  }
882 
884  const Value *ImgRsrc) {
885  assert(ImgRsrc);
886  auto PSV = ImagePSVs.try_emplace(
887  ImgRsrc,
888  std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
889  return PSV.first->second.get();
890  }
891 
893  if (!GWSResourcePSV) {
894  GWSResourcePSV =
895  std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
896  }
897 
898  return GWSResourcePSV.get();
899  }
900 
901  unsigned getOccupancy() const {
902  return Occupancy;
903  }
904 
905  unsigned getMinAllowedOccupancy() const {
906  if (!isMemoryBound() && !needsWaveLimiter())
907  return Occupancy;
908  return (Occupancy < 4) ? Occupancy : 4;
909  }
910 
911  void limitOccupancy(const MachineFunction &MF);
912 
913  void limitOccupancy(unsigned Limit) {
914  if (Occupancy > Limit)
915  Occupancy = Limit;
916  }
917 
918  void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
919  if (Occupancy < Limit)
920  Occupancy = Limit;
921  limitOccupancy(MF);
922  }
923 };
924 
925 } // end namespace llvm
926 
927 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
void setWorkItemIDX(ArgDescriptor Arg)
Optional< SIArgument > PrivateSegmentWaveByteOffset
static void mapping(IO &YamlIO, SIArgumentInfo &AI)
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Interface definition for SIRegisterInfo.
Register getRegister() const
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value...
StackOffset is a wrapper around scalable and non-scalable offsets and is used in several functions su...
bool isPSInputAllocated(unsigned Index) const
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void set(unsigned Idx)
Optional< SIArgument > KernargSegmentPtr
unsigned Reg
Optional< SIArgument > PrivateSegmentSize
static SIArgument createArgument(bool IsReg)
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
void setIsStackRealigned(bool Realigned=true)
static void mapping(IO &YamlIO, SIMode &Mode)
Optional< SIArgument > WorkItemIDY
constexpr char NumSpilledSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
AMDGPU::SIModeRegisterDefaults getMode() const
void setPrivateSegmentWaveByteOffset(unsigned Reg)
AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
void setHasNonSpillStackObjects(bool StackObject=true)
const AMDGPUFunctionArgInfo & getArgInfo() const
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Optional< SIArgument > DispatchPtr
void setHasSpilledVGPRs(bool Spill=true)
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
static bool IsRegister(const MCParsedAsmOperand &op)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
bool isConstant(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue has a constant value.
Optional< SIArgument > ImplicitArgPtr
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode)
SIArgument & operator=(const SIArgument &Other)
Optional< SIArgument > FlatScratchInit
void setStackPtrOffsetReg(unsigned Reg)
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void markPSInputAllocated(unsigned Index)
std::pair< unsigned, unsigned > getFlatWorkGroupSizes() const
Optional< SIArgumentInfo > ArgInfo
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
TargetInstrInfo - Interface to description of machine instruction set.
Optional< SIArgument > WorkItemIDX
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const SIInstrInfo &TII)
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI)
Optional< SIArgument > QueuePtr
SIArgument(const SIArgument &Other)
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(PreloadedValue Value) const
Optional< SIArgument > DispatchID
const AMDGPUBufferPseudoSourceValue * getBufferPSV(const SIInstrInfo &TII, const Value *BufferRsrc)
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
std::pair< unsigned, unsigned > getWavesPerEU() const
AMDGPUFunctionArgInfo & getArgInfo()
static bool classof(const PseudoSourceValue *V)
Optional< SIArgument > WorkGroupIDZ
void limitOccupancy(unsigned Limit)
void setHasSpilledSGPRs(bool Spill=true)
ArrayRef< SGPRSpillVGPRCSR > getSGPRSpillVGPRs() const
void setScratchWaveOffsetReg(unsigned Reg)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value...
static bool classof(const PseudoSourceValue *V)
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
Optional< unsigned > Mask
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static bool classof(const PseudoSourceValue *V)
Optional< SIArgument > PrivateSegmentBuffer
void setWorkItemIDZ(ArgDescriptor Arg)
unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const
Special value supplied for machine level alias analysis.
Optional< SIArgument > WorkItemIDZ
Optional< SIArgument > WorkGroupIDY
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Optional< SIArgument > ImplicitBufferPtr
constexpr char NumSpilledVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getImplicitBufferPtrUserSGPR() const
AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
static void mapping(IO &YamlIO, SIArgument &A)
Optional< SIArgument > WorkGroupInfo
IO & YamlIO
Definition: ELFYAML.cpp:952
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
const AMDGPUImagePseudoSourceValue * getImagePSV(const SIInstrInfo &TII, const Value *ImgRsrc)
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
void setWorkItemIDY(ArgDescriptor Arg)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1975
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
void setBytesInStackArgArea(unsigned Bytes)
AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
unsigned getWorkGroupIDSGPR(unsigned Dim) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Optional< SIArgument > WorkGroupIDX
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1224