LLVM  9.0.0svn
SIMachineFunctionInfo.h
Go to the documentation of this file.
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 
17 #include "AMDGPUMachineFunction.h"
19 #include "SIInstrInfo.h"
20 #include "SIRegisterInfo.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/MC/MCRegisterInfo.h"
32 #include <array>
33 #include <cassert>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class MachineFrameInfo;
40 class MachineFunction;
41 class TargetRegisterClass;
42 
44 public:
45  enum AMDGPUPSVKind : unsigned {
49  };
50 
51 protected:
53  : PseudoSourceValue(Kind, TII) {}
54 
55 public:
56  bool isConstant(const MachineFrameInfo *) const override {
57  // This should probably be true for most images, but we will start by being
58  // conservative.
59  return false;
60  }
61 
62  bool isAliased(const MachineFrameInfo *) const override {
63  return true;
64  }
65 
66  bool mayAlias(const MachineFrameInfo *) const override {
67  return true;
68  }
69 };
70 
72 public:
75 
76  static bool classof(const PseudoSourceValue *V) {
77  return V->kind() == PSVBuffer;
78  }
79 };
80 
82 public:
83  // TODO: Is the img rsrc useful?
86 
87  static bool classof(const PseudoSourceValue *V) {
88  return V->kind() == PSVImage;
89  }
90 };
91 
93 public:
96 
97  static bool classof(const PseudoSourceValue *V) {
98  return V->kind() == GWSResource;
99  }
100 
101  // These are inaccessible memory from IR.
102  bool isAliased(const MachineFrameInfo *) const override {
103  return false;
104  }
105 
106  // These are inaccessible memory from IR.
107  bool mayAlias(const MachineFrameInfo *) const override {
108  return false;
109  }
110 
111  void printCustom(raw_ostream &OS) const override {
112  OS << "GWSResource";
113  }
114 };
115 
116 namespace yaml {
117 
118 struct SIArgument {
120  union {
122  unsigned StackOffset;
123  };
125 
126  // Default constructor, which creates a stack argument.
127  SIArgument() : IsRegister(false), StackOffset(0) {}
128  SIArgument(const SIArgument &Other) {
129  IsRegister = Other.IsRegister;
130  if (IsRegister) {
131  ::new ((void *)std::addressof(RegisterName))
132  StringValue(Other.RegisterName);
133  } else
134  StackOffset = Other.StackOffset;
135  Mask = Other.Mask;
136  }
137  SIArgument &operator=(const SIArgument &Other) {
138  IsRegister = Other.IsRegister;
139  if (IsRegister) {
140  ::new ((void *)std::addressof(RegisterName))
141  StringValue(Other.RegisterName);
142  } else
143  StackOffset = Other.StackOffset;
144  Mask = Other.Mask;
145  return *this;
146  }
148  if (IsRegister)
149  RegisterName.~StringValue();
150  }
151 
152  // Helper to create a register or stack argument.
153  static inline SIArgument createArgument(bool IsReg) {
154  if (IsReg)
155  return SIArgument(IsReg);
156  return SIArgument();
157  }
158 
159 private:
160  // Construct a register argument.
161  SIArgument(bool) : IsRegister(true), RegisterName() {}
162 };
163 
164 template <> struct MappingTraits<SIArgument> {
165  static void mapping(IO &YamlIO, SIArgument &A) {
166  if (YamlIO.outputting()) {
167  if (A.IsRegister)
168  YamlIO.mapRequired("reg", A.RegisterName);
169  else
170  YamlIO.mapRequired("offset", A.StackOffset);
171  } else {
172  auto Keys = YamlIO.keys();
173  if (is_contained(Keys, "reg")) {
174  A = SIArgument::createArgument(true);
175  YamlIO.mapRequired("reg", A.RegisterName);
176  } else if (is_contained(Keys, "offset"))
177  YamlIO.mapRequired("offset", A.StackOffset);
178  else
179  YamlIO.setError("missing required key 'reg' or 'offset'");
180  }
181  YamlIO.mapOptional("mask", A.Mask);
182  }
183  static const bool flow = true;
184 };
185 
194 
200 
203 
207 };
208 
209 template <> struct MappingTraits<SIArgumentInfo> {
210  static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
211  YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
212  YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
213  YamlIO.mapOptional("queuePtr", AI.QueuePtr);
214  YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
215  YamlIO.mapOptional("dispatchID", AI.DispatchID);
216  YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
217  YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
218 
219  YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
220  YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
221  YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
222  YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
223  YamlIO.mapOptional("privateSegmentWaveByteOffset",
225 
226  YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
227  YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
228 
229  YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
230  YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
231  YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
232  }
233 };
234 
235 // Default to default mode for default calling convention.
236 struct SIMode {
237  bool IEEE = true;
238  bool DX10Clamp = true;
239 
240  SIMode() = default;
241 
242 
244  IEEE = Mode.IEEE;
245  DX10Clamp = Mode.DX10Clamp;
246  }
247 
248  bool operator ==(const SIMode Other) const {
249  return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
250  }
251 };
252 
253 template <> struct MappingTraits<SIMode> {
254  static void mapping(IO &YamlIO, SIMode &Mode) {
255  YamlIO.mapOptional("ieee", Mode.IEEE, true);
256  YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
257  }
258 };
259 
261  uint64_t ExplicitKernArgSize = 0;
262  unsigned MaxKernArgAlign = 0;
263  unsigned LDSSize = 0;
264  bool IsEntryFunction = false;
265  bool NoSignedZerosFPMath = false;
266  bool MemoryBound = false;
267  bool WaveLimiter = false;
268 
269  StringValue ScratchRSrcReg = "$private_rsrc_reg";
270  StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
271  StringValue FrameOffsetReg = "$fp_reg";
272  StringValue StackPtrOffsetReg = "$sp_reg";
273 
276 
277  SIMachineFunctionInfo() = default;
279  const TargetRegisterInfo &TRI);
280 
281  void mappingImpl(yaml::IO &YamlIO) override;
282  ~SIMachineFunctionInfo() = default;
283 };
284 
286  static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
287  YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
288  UINT64_C(0));
289  YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
290  YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
291  YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
292  YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
293  YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
294  YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
295  YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
296  StringValue("$private_rsrc_reg"));
297  YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
298  StringValue("$scratch_wave_offset_reg"));
299  YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
300  StringValue("$fp_reg"));
301  YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
302  StringValue("$sp_reg"));
303  YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
304  YamlIO.mapOptional("mode", MFI.Mode, SIMode());
305  }
306 };
307 
308 } // end namespace yaml
309 
310 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
311 /// tells the hardware which interpolation parameters to load.
313  friend class GCNTargetMachine;
314 
315  unsigned TIDReg = AMDGPU::NoRegister;
316 
317  // Registers that may be reserved for spilling purposes. These may be the same
318  // as the input registers.
319  unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
320  unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
321 
322  // This is the current function's incremented size from the kernel's scratch
323  // wave offset register. For an entry function, this is exactly the same as
324  // the ScratchWaveOffsetReg.
325  unsigned FrameOffsetReg = AMDGPU::FP_REG;
326 
327  // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
328  unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
329 
330  AMDGPUFunctionArgInfo ArgInfo;
331 
332  // State of MODE register, assumed FP mode.
334 
335  // Graphics info.
336  unsigned PSInputAddr = 0;
337  unsigned PSInputEnable = 0;
338 
339  /// Number of bytes of arguments this function has on the stack. If the callee
340  /// is expected to restore the argument stack this should be a multiple of 16,
341  /// all usable during a tail call.
342  ///
343  /// The alternative would forbid tail call optimisation in some cases: if we
344  /// want to transfer control from a function with 8-bytes of stack-argument
345  /// space to a function with 16-bytes then misalignment of this value would
346  /// make a stack adjustment necessary, which could not be undone by the
347  /// callee.
348  unsigned BytesInStackArgArea = 0;
349 
350  bool ReturnsVoid = true;
351 
352  // A pair of default/requested minimum/maximum flat work group sizes.
353  // Minimum - first, maximum - second.
354  std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
355 
356  // A pair of default/requested minimum/maximum number of waves per execution
357  // unit. Minimum - first, maximum - second.
358  std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
359 
360  DenseMap<const Value *,
361  std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
362  DenseMap<const Value *,
363  std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
364  std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
365 
366 private:
367  unsigned LDSWaveSpillSize = 0;
368  unsigned NumUserSGPRs = 0;
369  unsigned NumSystemSGPRs = 0;
370 
371  bool HasSpilledSGPRs = false;
372  bool HasSpilledVGPRs = false;
373  bool HasNonSpillStackObjects = false;
374  bool IsStackRealigned = false;
375 
376  unsigned NumSpilledSGPRs = 0;
377  unsigned NumSpilledVGPRs = 0;
378 
379  // Feature bits required for inputs passed in user SGPRs.
380  bool PrivateSegmentBuffer : 1;
381  bool DispatchPtr : 1;
382  bool QueuePtr : 1;
383  bool KernargSegmentPtr : 1;
384  bool DispatchID : 1;
385  bool FlatScratchInit : 1;
386 
387  // Feature bits required for inputs passed in system SGPRs.
388  bool WorkGroupIDX : 1; // Always initialized.
389  bool WorkGroupIDY : 1;
390  bool WorkGroupIDZ : 1;
391  bool WorkGroupInfo : 1;
392  bool PrivateSegmentWaveByteOffset : 1;
393 
394  bool WorkItemIDX : 1; // Always initialized.
395  bool WorkItemIDY : 1;
396  bool WorkItemIDZ : 1;
397 
398  // Private memory buffer
399  // Compute directly in sgpr[0:1]
400  // Other shaders indirect 64-bits at sgpr[0:1]
401  bool ImplicitBufferPtr : 1;
402 
403  // Pointer to where the ABI inserts special kernel arguments separate from the
404  // user arguments. This is an offset from the KernargSegmentPtr.
405  bool ImplicitArgPtr : 1;
406 
407  // The hard-wired high half of the address of the global information table
408  // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
409  // current hardware only allows a 16 bit value.
410  unsigned GITPtrHigh;
411 
412  unsigned HighBitsOf32BitAddress;
413  unsigned GDSSize;
414 
415  // Current recorded maximum possible occupancy.
416  unsigned Occupancy;
417 
418  MCPhysReg getNextUserSGPR() const;
419 
420  MCPhysReg getNextSystemSGPR() const;
421 
422 public:
423  struct SpilledReg {
424  unsigned VGPR = 0;
425  int Lane = -1;
426 
427  SpilledReg() = default;
428  SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
429 
430  bool hasLane() { return Lane != -1;}
431  bool hasReg() { return VGPR != 0;}
432  };
433 
435  // VGPR used for SGPR spills
436  unsigned VGPR;
437 
438  // If the VGPR is a CSR, the stack slot used to save/restore it in the
439  // prolog/epilog.
441 
442  SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
443  };
444 
447  bool FullyAllocated = false;
448  };
449 
451 
452  void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
453 
454 private:
455  // SGPR->VGPR spilling support.
456  using SpillRegMask = std::pair<unsigned, unsigned>;
457 
458  // Track VGPR + wave index for each subregister of the SGPR spilled to
459  // frameindex key.
460  DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
461  unsigned NumVGPRSpillLanes = 0;
463 
464  DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
465 
466  // AGPRs used for VGPR spills.
467  SmallVector<MCPhysReg, 32> SpillAGPR;
468 
469  // VGPRs used for AGPR spills.
470  SmallVector<MCPhysReg, 32> SpillVGPR;
471 
472 public: // FIXME
473  /// If this is set, an SGPR used for save/restore of the register used for the
474  /// frame pointer.
475  unsigned SGPRForFPSaveRestoreCopy = 0;
477 
478 public:
480 
481  bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
482 
484  auto I = SGPRToVGPRSpills.find(FrameIndex);
485  return (I == SGPRToVGPRSpills.end()) ?
486  ArrayRef<SpilledReg>() : makeArrayRef(I->second);
487  }
488 
490  return SpillVGPRs;
491  }
492 
494  return SpillAGPR;
495  }
496 
498  return SpillVGPR;
499  }
500 
501  MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
502  auto I = VGPRToAGPRSpills.find(FrameIndex);
503  return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
504  : I->second.Lanes[Lane];
505  }
506 
508  return Mode;
509  }
510 
511  bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
512  unsigned NumLane) const;
513  bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
514  bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
515  void removeDeadFrameIndices(MachineFrameInfo &MFI);
516 
517  bool hasCalculatedTID() const { return TIDReg != 0; };
518  unsigned getTIDReg() const { return TIDReg; };
519  void setTIDReg(unsigned Reg) { TIDReg = Reg; }
520 
521  unsigned getBytesInStackArgArea() const {
522  return BytesInStackArgArea;
523  }
524 
525  void setBytesInStackArgArea(unsigned Bytes) {
526  BytesInStackArgArea = Bytes;
527  }
528 
529  // Add user SGPRs.
530  unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
531  unsigned addDispatchPtr(const SIRegisterInfo &TRI);
532  unsigned addQueuePtr(const SIRegisterInfo &TRI);
533  unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
534  unsigned addDispatchID(const SIRegisterInfo &TRI);
535  unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
536  unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
537 
538  // Add system SGPRs.
539  unsigned addWorkGroupIDX() {
540  ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
541  NumSystemSGPRs += 1;
542  return ArgInfo.WorkGroupIDX.getRegister();
543  }
544 
545  unsigned addWorkGroupIDY() {
546  ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
547  NumSystemSGPRs += 1;
548  return ArgInfo.WorkGroupIDY.getRegister();
549  }
550 
551  unsigned addWorkGroupIDZ() {
552  ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
553  NumSystemSGPRs += 1;
554  return ArgInfo.WorkGroupIDZ.getRegister();
555  }
556 
557  unsigned addWorkGroupInfo() {
558  ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
559  NumSystemSGPRs += 1;
560  return ArgInfo.WorkGroupInfo.getRegister();
561  }
562 
563  // Add special VGPR inputs
565  ArgInfo.WorkItemIDX = Arg;
566  }
567 
569  ArgInfo.WorkItemIDY = Arg;
570  }
571 
573  ArgInfo.WorkItemIDZ = Arg;
574  }
575 
578  = ArgDescriptor::createRegister(getNextSystemSGPR());
579  NumSystemSGPRs += 1;
580  return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
581  }
582 
585  }
586 
587  bool hasPrivateSegmentBuffer() const {
588  return PrivateSegmentBuffer;
589  }
590 
591  bool hasDispatchPtr() const {
592  return DispatchPtr;
593  }
594 
595  bool hasQueuePtr() const {
596  return QueuePtr;
597  }
598 
599  bool hasKernargSegmentPtr() const {
600  return KernargSegmentPtr;
601  }
602 
603  bool hasDispatchID() const {
604  return DispatchID;
605  }
606 
607  bool hasFlatScratchInit() const {
608  return FlatScratchInit;
609  }
610 
611  bool hasWorkGroupIDX() const {
612  return WorkGroupIDX;
613  }
614 
615  bool hasWorkGroupIDY() const {
616  return WorkGroupIDY;
617  }
618 
619  bool hasWorkGroupIDZ() const {
620  return WorkGroupIDZ;
621  }
622 
623  bool hasWorkGroupInfo() const {
624  return WorkGroupInfo;
625  }
626 
628  return PrivateSegmentWaveByteOffset;
629  }
630 
631  bool hasWorkItemIDX() const {
632  return WorkItemIDX;
633  }
634 
635  bool hasWorkItemIDY() const {
636  return WorkItemIDY;
637  }
638 
639  bool hasWorkItemIDZ() const {
640  return WorkItemIDZ;
641  }
642 
643  bool hasImplicitArgPtr() const {
644  return ImplicitArgPtr;
645  }
646 
647  bool hasImplicitBufferPtr() const {
648  return ImplicitBufferPtr;
649  }
650 
652  return ArgInfo;
653  }
654 
656  return ArgInfo;
657  }
658 
659  std::pair<const ArgDescriptor *, const TargetRegisterClass *>
661  return ArgInfo.getPreloadedValue(Value);
662  }
663 
665  auto Arg = ArgInfo.getPreloadedValue(Value).first;
666  return Arg ? Arg->getRegister() : Register();
667  }
668 
669  unsigned getGITPtrHigh() const {
670  return GITPtrHigh;
671  }
672 
673  unsigned get32BitAddressHighBits() const {
674  return HighBitsOf32BitAddress;
675  }
676 
677  unsigned getGDSSize() const {
678  return GDSSize;
679  }
680 
681  unsigned getNumUserSGPRs() const {
682  return NumUserSGPRs;
683  }
684 
685  unsigned getNumPreloadedSGPRs() const {
686  return NumUserSGPRs + NumSystemSGPRs;
687  }
688 
690  return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
691  }
692 
693  /// Returns the physical register reserved for use as the resource
694  /// descriptor for scratch accesses.
695  unsigned getScratchRSrcReg() const {
696  return ScratchRSrcReg;
697  }
698 
699  void setScratchRSrcReg(unsigned Reg) {
700  assert(Reg != 0 && "Should never be unset");
701  ScratchRSrcReg = Reg;
702  }
703 
704  unsigned getScratchWaveOffsetReg() const {
705  return ScratchWaveOffsetReg;
706  }
707 
708  unsigned getFrameOffsetReg() const {
709  return FrameOffsetReg;
710  }
711 
712  void setFrameOffsetReg(unsigned Reg) {
713  assert(Reg != 0 && "Should never be unset");
714  FrameOffsetReg = Reg;
715  }
716 
717  void setStackPtrOffsetReg(unsigned Reg) {
718  assert(Reg != 0 && "Should never be unset");
719  StackPtrOffsetReg = Reg;
720  }
721 
722  // Note the unset value for this is AMDGPU::SP_REG rather than
723  // NoRegister. This is mostly a workaround for MIR tests where state that
724  // can't be directly computed from the function is not preserved in serialized
725  // MIR.
726  unsigned getStackPtrOffsetReg() const {
727  return StackPtrOffsetReg;
728  }
729 
730  void setScratchWaveOffsetReg(unsigned Reg) {
731  assert(Reg != 0 && "Should never be unset");
732  ScratchWaveOffsetReg = Reg;
733  }
734 
735  unsigned getQueuePtrUserSGPR() const {
736  return ArgInfo.QueuePtr.getRegister();
737  }
738 
739  unsigned getImplicitBufferPtrUserSGPR() const {
740  return ArgInfo.ImplicitBufferPtr.getRegister();
741  }
742 
743  bool hasSpilledSGPRs() const {
744  return HasSpilledSGPRs;
745  }
746 
747  void setHasSpilledSGPRs(bool Spill = true) {
748  HasSpilledSGPRs = Spill;
749  }
750 
751  bool hasSpilledVGPRs() const {
752  return HasSpilledVGPRs;
753  }
754 
755  void setHasSpilledVGPRs(bool Spill = true) {
756  HasSpilledVGPRs = Spill;
757  }
758 
759  bool hasNonSpillStackObjects() const {
760  return HasNonSpillStackObjects;
761  }
762 
763  void setHasNonSpillStackObjects(bool StackObject = true) {
764  HasNonSpillStackObjects = StackObject;
765  }
766 
767  bool isStackRealigned() const {
768  return IsStackRealigned;
769  }
770 
771  void setIsStackRealigned(bool Realigned = true) {
772  IsStackRealigned = Realigned;
773  }
774 
775  unsigned getNumSpilledSGPRs() const {
776  return NumSpilledSGPRs;
777  }
778 
779  unsigned getNumSpilledVGPRs() const {
780  return NumSpilledVGPRs;
781  }
782 
783  void addToSpilledSGPRs(unsigned num) {
784  NumSpilledSGPRs += num;
785  }
786 
787  void addToSpilledVGPRs(unsigned num) {
788  NumSpilledVGPRs += num;
789  }
790 
791  unsigned getPSInputAddr() const {
792  return PSInputAddr;
793  }
794 
795  unsigned getPSInputEnable() const {
796  return PSInputEnable;
797  }
798 
799  bool isPSInputAllocated(unsigned Index) const {
800  return PSInputAddr & (1 << Index);
801  }
802 
803  void markPSInputAllocated(unsigned Index) {
804  PSInputAddr |= 1 << Index;
805  }
806 
807  void markPSInputEnabled(unsigned Index) {
808  PSInputEnable |= 1 << Index;
809  }
810 
811  bool returnsVoid() const {
812  return ReturnsVoid;
813  }
814 
815  void setIfReturnsVoid(bool Value) {
816  ReturnsVoid = Value;
817  }
818 
819  /// \returns A pair of default/requested minimum/maximum flat work group sizes
820  /// for this function.
821  std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
822  return FlatWorkGroupSizes;
823  }
824 
825  /// \returns Default/requested minimum flat work group size for this function.
826  unsigned getMinFlatWorkGroupSize() const {
827  return FlatWorkGroupSizes.first;
828  }
829 
830  /// \returns Default/requested maximum flat work group size for this function.
831  unsigned getMaxFlatWorkGroupSize() const {
832  return FlatWorkGroupSizes.second;
833  }
834 
835  /// \returns A pair of default/requested minimum/maximum number of waves per
836  /// execution unit.
837  std::pair<unsigned, unsigned> getWavesPerEU() const {
838  return WavesPerEU;
839  }
840 
841  /// \returns Default/requested minimum number of waves per execution unit.
842  unsigned getMinWavesPerEU() const {
843  return WavesPerEU.first;
844  }
845 
846  /// \returns Default/requested maximum number of waves per execution unit.
847  unsigned getMaxWavesPerEU() const {
848  return WavesPerEU.second;
849  }
850 
851  /// \returns SGPR used for \p Dim's work group ID.
852  unsigned getWorkGroupIDSGPR(unsigned Dim) const {
853  switch (Dim) {
854  case 0:
855  assert(hasWorkGroupIDX());
856  return ArgInfo.WorkGroupIDX.getRegister();
857  case 1:
858  assert(hasWorkGroupIDY());
859  return ArgInfo.WorkGroupIDY.getRegister();
860  case 2:
861  assert(hasWorkGroupIDZ());
862  return ArgInfo.WorkGroupIDZ.getRegister();
863  }
864  llvm_unreachable("unexpected dimension");
865  }
866 
867  unsigned getLDSWaveSpillSize() const {
868  return LDSWaveSpillSize;
869  }
870 
872  const Value *BufferRsrc) {
873  assert(BufferRsrc);
874  auto PSV = BufferPSVs.try_emplace(
875  BufferRsrc,
876  llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
877  return PSV.first->second.get();
878  }
879 
881  const Value *ImgRsrc) {
882  assert(ImgRsrc);
883  auto PSV = ImagePSVs.try_emplace(
884  ImgRsrc,
885  llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
886  return PSV.first->second.get();
887  }
888 
890  if (!GWSResourcePSV) {
891  GWSResourcePSV =
892  llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
893  }
894 
895  return GWSResourcePSV.get();
896  }
897 
898  unsigned getOccupancy() const {
899  return Occupancy;
900  }
901 
902  unsigned getMinAllowedOccupancy() const {
903  if (!isMemoryBound() && !needsWaveLimiter())
904  return Occupancy;
905  return (Occupancy < 4) ? Occupancy : 4;
906  }
907 
908  void limitOccupancy(const MachineFunction &MF);
909 
910  void limitOccupancy(unsigned Limit) {
911  if (Occupancy > Limit)
912  Occupancy = Limit;
913  }
914 
915  void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
916  if (Occupancy < Limit)
917  Occupancy = Limit;
918  limitOccupancy(MF);
919  }
920 };
921 
922 } // end namespace llvm
923 
924 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
void setWorkItemIDX(ArgDescriptor Arg)
Optional< SIArgument > PrivateSegmentWaveByteOffset
static void mapping(IO &YamlIO, SIArgumentInfo &AI)
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Interface definition for SIRegisterInfo.
Register getRegister() const
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value...
bool isPSInputAllocated(unsigned Index) const
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void set(unsigned Idx)
Optional< SIArgument > KernargSegmentPtr
unsigned Reg
Optional< SIArgument > PrivateSegmentSize
static SIArgument createArgument(bool IsReg)
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
void setIsStackRealigned(bool Realigned=true)
static void mapping(IO &YamlIO, SIMode &Mode)
Optional< SIArgument > WorkItemIDY
constexpr char NumSpilledSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
AMDGPU::SIModeRegisterDefaults getMode() const
void setPrivateSegmentWaveByteOffset(unsigned Reg)
AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
void setHasNonSpillStackObjects(bool StackObject=true)
const AMDGPUFunctionArgInfo & getArgInfo() const
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Optional< SIArgument > DispatchPtr
void setHasSpilledVGPRs(bool Spill=true)
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
static bool IsRegister(const MCParsedAsmOperand &op)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
bool isConstant(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue has a constant value.
Optional< SIArgument > ImplicitArgPtr
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode)
SIArgument & operator=(const SIArgument &Other)
Optional< SIArgument > FlatScratchInit
void setStackPtrOffsetReg(unsigned Reg)
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void markPSInputAllocated(unsigned Index)
std::pair< unsigned, unsigned > getFlatWorkGroupSizes() const
Optional< SIArgumentInfo > ArgInfo
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
Optional< SIArgument > WorkItemIDX
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const SIInstrInfo &TII)
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI)
Optional< SIArgument > QueuePtr
SIArgument(const SIArgument &Other)
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(PreloadedValue Value) const
Optional< SIArgument > DispatchID
const AMDGPUBufferPseudoSourceValue * getBufferPSV(const SIInstrInfo &TII, const Value *BufferRsrc)
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
std::pair< unsigned, unsigned > getWavesPerEU() const
AMDGPUFunctionArgInfo & getArgInfo()
static bool classof(const PseudoSourceValue *V)
Optional< SIArgument > WorkGroupIDZ
void limitOccupancy(unsigned Limit)
void setHasSpilledSGPRs(bool Spill=true)
ArrayRef< SGPRSpillVGPRCSR > getSGPRSpillVGPRs() const
void setScratchWaveOffsetReg(unsigned Reg)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value...
static bool classof(const PseudoSourceValue *V)
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
Optional< unsigned > Mask
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static bool classof(const PseudoSourceValue *V)
Optional< SIArgument > PrivateSegmentBuffer
void setWorkItemIDZ(ArgDescriptor Arg)
unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const
Special value supplied for machine level alias analysis.
Optional< SIArgument > WorkItemIDZ
Optional< SIArgument > WorkGroupIDY
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Optional< SIArgument > ImplicitBufferPtr
constexpr char NumSpilledVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getImplicitBufferPtrUserSGPR() const
AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
static void mapping(IO &YamlIO, SIArgument &A)
Optional< SIArgument > WorkGroupInfo
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
const AMDGPUImagePseudoSourceValue * getImagePSV(const SIInstrInfo &TII, const Value *ImgRsrc)
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
void setWorkItemIDY(ArgDescriptor Arg)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1966
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
void setBytesInStackArgArea(unsigned Bytes)
AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
unsigned getWorkGroupIDSGPR(unsigned Dim) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
Optional< SIArgument > WorkGroupIDX
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1251