Line data Source code
1 : //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 : #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
16 :
17 : #include "AMDGPUArgumentUsageInfo.h"
18 : #include "AMDGPUMachineFunction.h"
19 : #include "SIInstrInfo.h"
20 : #include "SIRegisterInfo.h"
21 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 : #include "llvm/ADT/ArrayRef.h"
23 : #include "llvm/ADT/DenseMap.h"
24 : #include "llvm/ADT/Optional.h"
25 : #include "llvm/ADT/SmallVector.h"
26 : #include "llvm/CodeGen/PseudoSourceValue.h"
27 : #include "llvm/CodeGen/TargetInstrInfo.h"
28 : #include "llvm/MC/MCRegisterInfo.h"
29 : #include "llvm/Support/ErrorHandling.h"
30 : #include <array>
31 : #include <cassert>
32 : #include <utility>
33 : #include <vector>
34 :
35 : namespace llvm {
36 :
37 : class MachineFrameInfo;
38 : class MachineFunction;
39 : class TargetRegisterClass;
40 :
41 : class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
42 : public:
43 : // TODO: Is the img rsrc useful?
44 709 : explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
45 709 : PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
46 :
47 4683 : bool isConstant(const MachineFrameInfo *) const override {
48 : // This should probably be true for most images, but we will start by being
49 : // conservative.
50 4683 : return false;
51 : }
52 :
53 1305 : bool isAliased(const MachineFrameInfo *) const override {
54 1305 : return true;
55 : }
56 :
57 3 : bool mayAlias(const MachineFrameInfo *) const override {
58 3 : return true;
59 : }
60 : };
61 :
62 : class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
63 : public:
64 1113 : explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
65 1113 : PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
66 :
67 3710 : bool isConstant(const MachineFrameInfo *) const override {
68 : // This should probably be true for most images, but we will start by being
69 : // conservative.
70 3710 : return false;
71 : }
72 :
73 1563 : bool isAliased(const MachineFrameInfo *) const override {
74 1563 : return true;
75 : }
76 :
77 8 : bool mayAlias(const MachineFrameInfo *) const override {
78 8 : return true;
79 : }
80 : };
81 :
82 : /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
83 : /// tells the hardware which interpolation parameters to load.
84 : class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
85 : unsigned TIDReg = AMDGPU::NoRegister;
86 :
87 : // Registers that may be reserved for spilling purposes. These may be the same
88 : // as the input registers.
89 : unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
90 : unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
91 :
92 : // This is the current function's incremented size from the kernel's scratch
93 : // wave offset register. For an entry function, this is exactly the same as
94 : // the ScratchWaveOffsetReg.
95 : unsigned FrameOffsetReg = AMDGPU::FP_REG;
96 :
97 : // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
98 : unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
99 :
100 : AMDGPUFunctionArgInfo ArgInfo;
101 :
102 : // Graphics info.
103 : unsigned PSInputAddr = 0;
104 : unsigned PSInputEnable = 0;
105 :
106 : /// Number of bytes of arguments this function has on the stack. If the callee
107 : /// is expected to restore the argument stack this should be a multiple of 16,
108 : /// all usable during a tail call.
109 : ///
110 : /// The alternative would forbid tail call optimisation in some cases: if we
111 : /// want to transfer control from a function with 8-bytes of stack-argument
112 : /// space to a function with 16-bytes then misalignment of this value would
113 : /// make a stack adjustment necessary, which could not be undone by the
114 : /// callee.
115 : unsigned BytesInStackArgArea = 0;
116 :
117 : bool ReturnsVoid = true;
118 :
119 : // A pair of default/requested minimum/maximum flat work group sizes.
120 : // Minimum - first, maximum - second.
121 : std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
122 :
123 : // A pair of default/requested minimum/maximum number of waves per execution
124 : // unit. Minimum - first, maximum - second.
125 : std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
126 :
127 : // Stack object indices for work group IDs.
128 : std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
129 :
130 : // Stack object indices for work item IDs.
131 : std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
132 :
133 : DenseMap<const Value *,
134 : std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
135 : DenseMap<const Value *,
136 : std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
137 :
138 : private:
139 : unsigned LDSWaveSpillSize = 0;
140 : unsigned NumUserSGPRs = 0;
141 : unsigned NumSystemSGPRs = 0;
142 :
143 : bool HasSpilledSGPRs = false;
144 : bool HasSpilledVGPRs = false;
145 : bool HasNonSpillStackObjects = false;
146 : bool IsStackRealigned = false;
147 :
148 : unsigned NumSpilledSGPRs = 0;
149 : unsigned NumSpilledVGPRs = 0;
150 :
151 : // Feature bits required for inputs passed in user SGPRs.
152 : bool PrivateSegmentBuffer : 1;
153 : bool DispatchPtr : 1;
154 : bool QueuePtr : 1;
155 : bool KernargSegmentPtr : 1;
156 : bool DispatchID : 1;
157 : bool FlatScratchInit : 1;
158 :
159 : // Feature bits required for inputs passed in system SGPRs.
160 : bool WorkGroupIDX : 1; // Always initialized.
161 : bool WorkGroupIDY : 1;
162 : bool WorkGroupIDZ : 1;
163 : bool WorkGroupInfo : 1;
164 : bool PrivateSegmentWaveByteOffset : 1;
165 :
166 : bool WorkItemIDX : 1; // Always initialized.
167 : bool WorkItemIDY : 1;
168 : bool WorkItemIDZ : 1;
169 :
170 : // Private memory buffer
171 : // Compute directly in sgpr[0:1]
172 : // Other shaders indirect 64-bits at sgpr[0:1]
173 : bool ImplicitBufferPtr : 1;
174 :
175 : // Pointer to where the ABI inserts special kernel arguments separate from the
176 : // user arguments. This is an offset from the KernargSegmentPtr.
177 : bool ImplicitArgPtr : 1;
178 :
179 : // The hard-wired high half of the address of the global information table
180 : // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
181 : // current hardware only allows a 16 bit value.
182 : unsigned GITPtrHigh;
183 :
184 : unsigned HighBitsOf32BitAddress;
185 :
186 : // Current recorded maximum possible occupancy.
187 : unsigned Occupancy;
188 :
189 : MCPhysReg getNextUserSGPR() const;
190 :
191 : MCPhysReg getNextSystemSGPR() const;
192 :
193 : public:
194 : struct SpilledReg {
195 : unsigned VGPR = 0;
196 : int Lane = -1;
197 :
198 : SpilledReg() = default;
199 1171 : SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
200 :
201 : bool hasLane() { return Lane != -1;}
202 : bool hasReg() { return VGPR != 0;}
203 : };
204 :
205 148 : struct SGPRSpillVGPRCSR {
206 : // VGPR used for SGPR spills
207 : unsigned VGPR;
208 :
209 : // If the VGPR is a CSR, the stack slot used to save/restore it in the
210 : // prolog/epilog.
211 : Optional<int> FI;
212 :
213 146 : SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
214 : };
215 :
216 : private:
217 : // SGPR->VGPR spilling support.
218 : using SpillRegMask = std::pair<unsigned, unsigned>;
219 :
220 : // Track VGPR + wave index for each subregister of the SGPR spilled to
221 : // frameindex key.
222 : DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
223 : unsigned NumVGPRSpillLanes = 0;
224 : SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
225 :
226 : public:
227 : SIMachineFunctionInfo(const MachineFunction &MF);
228 :
229 1370 : ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
230 1370 : auto I = SGPRToVGPRSpills.find(FrameIndex);
231 : return (I == SGPRToVGPRSpills.end()) ?
232 1370 : ArrayRef<SpilledReg>() : makeArrayRef(I->second);
233 : }
234 :
235 : ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
236 : return SpillVGPRs;
237 : }
238 :
239 : bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
240 : void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
241 :
242 0 : bool hasCalculatedTID() const { return TIDReg != 0; };
243 0 : unsigned getTIDReg() const { return TIDReg; };
244 0 : void setTIDReg(unsigned Reg) { TIDReg = Reg; }
245 :
246 0 : unsigned getBytesInStackArgArea() const {
247 0 : return BytesInStackArgArea;
248 : }
249 :
250 0 : void setBytesInStackArgArea(unsigned Bytes) {
251 19709 : BytesInStackArgArea = Bytes;
252 0 : }
253 :
254 : // Add user SGPRs.
255 : unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
256 : unsigned addDispatchPtr(const SIRegisterInfo &TRI);
257 : unsigned addQueuePtr(const SIRegisterInfo &TRI);
258 : unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
259 : unsigned addDispatchID(const SIRegisterInfo &TRI);
260 : unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
261 : unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
262 :
263 : // Add system SGPRs.
264 : unsigned addWorkGroupIDX() {
265 16213 : ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
266 16213 : NumSystemSGPRs += 1;
267 : return ArgInfo.WorkGroupIDX.getRegister();
268 : }
269 :
270 : unsigned addWorkGroupIDY() {
271 24 : ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
272 24 : NumSystemSGPRs += 1;
273 : return ArgInfo.WorkGroupIDY.getRegister();
274 : }
275 :
276 : unsigned addWorkGroupIDZ() {
277 24 : ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
278 24 : NumSystemSGPRs += 1;
279 : return ArgInfo.WorkGroupIDZ.getRegister();
280 : }
281 :
282 : unsigned addWorkGroupInfo() {
283 0 : ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
284 0 : NumSystemSGPRs += 1;
285 : return ArgInfo.WorkGroupInfo.getRegister();
286 : }
287 :
288 : // Add special VGPR inputs
289 : void setWorkItemIDX(ArgDescriptor Arg) {
290 0 : ArgInfo.WorkItemIDX = Arg;
291 : }
292 :
293 : void setWorkItemIDY(ArgDescriptor Arg) {
294 0 : ArgInfo.WorkItemIDY = Arg;
295 : }
296 :
297 : void setWorkItemIDZ(ArgDescriptor Arg) {
298 0 : ArgInfo.WorkItemIDZ = Arg;
299 : }
300 :
301 : unsigned addPrivateSegmentWaveByteOffset() {
302 : ArgInfo.PrivateSegmentWaveByteOffset
303 16213 : = ArgDescriptor::createRegister(getNextSystemSGPR());
304 16213 : NumSystemSGPRs += 1;
305 : return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
306 : }
307 :
308 : void setPrivateSegmentWaveByteOffset(unsigned Reg) {
309 45 : ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
310 : }
311 :
312 : bool hasPrivateSegmentBuffer() const {
313 20574 : return PrivateSegmentBuffer;
314 : }
315 :
316 : bool hasDispatchPtr() const {
317 23100 : return DispatchPtr;
318 : }
319 :
320 : bool hasQueuePtr() const {
321 20574 : return QueuePtr;
322 : }
323 :
324 : bool hasKernargSegmentPtr() const {
325 20574 : return KernargSegmentPtr;
326 : }
327 :
328 : bool hasDispatchID() const {
329 20574 : return DispatchID;
330 : }
331 :
332 : bool hasFlatScratchInit() const {
333 184810 : return FlatScratchInit;
334 : }
335 :
336 : bool hasWorkGroupIDX() const {
337 35910 : return WorkGroupIDX;
338 : }
339 :
340 : bool hasWorkGroupIDY() const {
341 35910 : return WorkGroupIDY;
342 : }
343 :
344 : bool hasWorkGroupIDZ() const {
345 35910 : return WorkGroupIDZ;
346 : }
347 :
348 : bool hasWorkGroupInfo() const {
349 35910 : return WorkGroupInfo;
350 : }
351 :
352 : bool hasPrivateSegmentWaveByteOffset() const {
353 17950 : return PrivateSegmentWaveByteOffset;
354 : }
355 :
356 : bool hasWorkItemIDX() const {
357 0 : return WorkItemIDX;
358 : }
359 :
360 : bool hasWorkItemIDY() const {
361 17884 : return WorkItemIDY;
362 : }
363 :
364 : bool hasWorkItemIDZ() const {
365 17960 : return WorkItemIDZ;
366 : }
367 :
368 : bool hasImplicitArgPtr() const {
369 0 : return ImplicitArgPtr;
370 : }
371 :
372 : bool hasImplicitBufferPtr() const {
373 18435 : return ImplicitBufferPtr;
374 : }
375 :
376 : AMDGPUFunctionArgInfo &getArgInfo() {
377 19709 : return ArgInfo;
378 : }
379 :
380 : const AMDGPUFunctionArgInfo &getArgInfo() const {
381 575 : return ArgInfo;
382 : }
383 :
384 : std::pair<const ArgDescriptor *, const TargetRegisterClass *>
385 : getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
386 54901 : return ArgInfo.getPreloadedValue(Value);
387 : }
388 :
389 : unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
390 2436 : return ArgInfo.getPreloadedValue(Value).first->getRegister();
391 : }
392 :
393 0 : unsigned getGITPtrHigh() const {
394 0 : return GITPtrHigh;
395 : }
396 :
397 0 : unsigned get32BitAddressHighBits() const {
398 0 : return HighBitsOf32BitAddress;
399 : }
400 :
401 0 : unsigned getNumUserSGPRs() const {
402 0 : return NumUserSGPRs;
403 : }
404 :
405 0 : unsigned getNumPreloadedSGPRs() const {
406 1308 : return NumUserSGPRs + NumSystemSGPRs;
407 : }
408 :
409 : unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
410 50 : return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
411 : }
412 :
413 : /// Returns the physical register reserved for use as the resource
414 : /// descriptor for scratch accesses.
415 0 : unsigned getScratchRSrcReg() const {
416 0 : return ScratchRSrcReg;
417 : }
418 :
419 0 : void setScratchRSrcReg(unsigned Reg) {
420 : assert(Reg != 0 && "Should never be unset");
421 16330 : ScratchRSrcReg = Reg;
422 0 : }
423 :
424 0 : unsigned getScratchWaveOffsetReg() const {
425 0 : return ScratchWaveOffsetReg;
426 : }
427 :
428 0 : unsigned getFrameOffsetReg() const {
429 0 : return FrameOffsetReg;
430 : }
431 :
432 0 : void setStackPtrOffsetReg(unsigned Reg) {
433 : assert(Reg != 0 && "Should never be unset");
434 2193 : StackPtrOffsetReg = Reg;
435 0 : }
436 :
437 : // Note the unset value for this is AMDGPU::SP_REG rather than
438 : // NoRegister. This is mostly a workaround for MIR tests where state that
439 : // can't be directly computed from the function is not preserved in serialized
440 : // MIR.
441 0 : unsigned getStackPtrOffsetReg() const {
442 0 : return StackPtrOffsetReg;
443 : }
444 :
445 : void setScratchWaveOffsetReg(unsigned Reg) {
446 : assert(Reg != 0 && "Should never be unset");
447 18512 : ScratchWaveOffsetReg = Reg;
448 18512 : if (isEntryFunction())
449 18512 : FrameOffsetReg = ScratchWaveOffsetReg;
450 : }
451 :
452 : unsigned getQueuePtrUserSGPR() const {
453 26 : return ArgInfo.QueuePtr.getRegister();
454 : }
455 :
456 : unsigned getImplicitBufferPtrUserSGPR() const {
457 2 : return ArgInfo.ImplicitBufferPtr.getRegister();
458 : }
459 :
460 0 : bool hasSpilledSGPRs() const {
461 0 : return HasSpilledSGPRs;
462 : }
463 :
464 : void setHasSpilledSGPRs(bool Spill = true) {
465 702 : HasSpilledSGPRs = Spill;
466 : }
467 :
468 0 : bool hasSpilledVGPRs() const {
469 0 : return HasSpilledVGPRs;
470 : }
471 :
472 : void setHasSpilledVGPRs(bool Spill = true) {
473 1240 : HasSpilledVGPRs = Spill;
474 : }
475 :
476 0 : bool hasNonSpillStackObjects() const {
477 0 : return HasNonSpillStackObjects;
478 : }
479 :
480 : void setHasNonSpillStackObjects(bool StackObject = true) {
481 418 : HasNonSpillStackObjects = StackObject;
482 : }
483 :
484 0 : bool isStackRealigned() const {
485 0 : return IsStackRealigned;
486 : }
487 :
488 : void setIsStackRealigned(bool Realigned = true) {
489 3 : IsStackRealigned = Realigned;
490 : }
491 :
492 0 : unsigned getNumSpilledSGPRs() const {
493 0 : return NumSpilledSGPRs;
494 : }
495 :
496 0 : unsigned getNumSpilledVGPRs() const {
497 0 : return NumSpilledVGPRs;
498 : }
499 :
500 0 : void addToSpilledSGPRs(unsigned num) {
501 690 : NumSpilledSGPRs += num;
502 0 : }
503 :
504 0 : void addToSpilledVGPRs(unsigned num) {
505 1316 : NumSpilledVGPRs += num;
506 0 : }
507 :
508 0 : unsigned getPSInputAddr() const {
509 0 : return PSInputAddr;
510 : }
511 :
512 0 : unsigned getPSInputEnable() const {
513 0 : return PSInputEnable;
514 : }
515 :
516 0 : bool isPSInputAllocated(unsigned Index) const {
517 4 : return PSInputAddr & (1 << Index);
518 : }
519 :
520 0 : void markPSInputAllocated(unsigned Index) {
521 1 : PSInputAddr |= 1 << Index;
522 0 : }
523 :
524 0 : void markPSInputEnabled(unsigned Index) {
525 303 : PSInputEnable |= 1 << Index;
526 0 : }
527 :
528 0 : bool returnsVoid() const {
529 0 : return ReturnsVoid;
530 : }
531 :
532 : void setIfReturnsVoid(bool Value) {
533 3450 : ReturnsVoid = Value;
534 : }
535 :
536 : /// \returns A pair of default/requested minimum/maximum flat work group sizes
537 : /// for this function.
538 : std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
539 : return FlatWorkGroupSizes;
540 : }
541 :
542 : /// \returns Default/requested minimum flat work group size for this function.
543 : unsigned getMinFlatWorkGroupSize() const {
544 : return FlatWorkGroupSizes.first;
545 : }
546 :
547 : /// \returns Default/requested maximum flat work group size for this function.
548 0 : unsigned getMaxFlatWorkGroupSize() const {
549 0 : return FlatWorkGroupSizes.second;
550 : }
551 :
552 : /// \returns A pair of default/requested minimum/maximum number of waves per
553 : /// execution unit.
554 0 : std::pair<unsigned, unsigned> getWavesPerEU() const {
555 0 : return WavesPerEU;
556 : }
557 :
558 : /// \returns Default/requested minimum number of waves per execution unit.
559 : unsigned getMinWavesPerEU() const {
560 : return WavesPerEU.first;
561 : }
562 :
563 : /// \returns Default/requested maximum number of waves per execution unit.
564 0 : unsigned getMaxWavesPerEU() const {
565 0 : return WavesPerEU.second;
566 : }
567 :
568 : /// \returns Stack object index for \p Dim's work group ID.
569 : int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
570 : assert(Dim < 3);
571 12 : return DebuggerWorkGroupIDStackObjectIndices[Dim];
572 : }
573 :
574 : /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
575 : void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
576 : assert(Dim < 3);
577 12 : DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
578 : }
579 :
580 : /// \returns Stack object index for \p Dim's work item ID.
581 : int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
582 : assert(Dim < 3);
583 12 : return DebuggerWorkItemIDStackObjectIndices[Dim];
584 : }
585 :
586 : /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
587 : void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
588 : assert(Dim < 3);
589 12 : DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
590 : }
591 :
592 : /// \returns SGPR used for \p Dim's work group ID.
593 : unsigned getWorkGroupIDSGPR(unsigned Dim) const {
594 12 : switch (Dim) {
595 4 : case 0:
596 : assert(hasWorkGroupIDX());
597 4 : return ArgInfo.WorkGroupIDX.getRegister();
598 4 : case 1:
599 : assert(hasWorkGroupIDY());
600 4 : return ArgInfo.WorkGroupIDY.getRegister();
601 4 : case 2:
602 : assert(hasWorkGroupIDZ());
603 4 : return ArgInfo.WorkGroupIDZ.getRegister();
604 : }
605 0 : llvm_unreachable("unexpected dimension");
606 : }
607 :
608 : /// \returns VGPR used for \p Dim' work item ID.
609 : unsigned getWorkItemIDVGPR(unsigned Dim) const;
610 :
611 0 : unsigned getLDSWaveSpillSize() const {
612 0 : return LDSWaveSpillSize;
613 : }
614 :
615 1113 : const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
616 : const Value *BufferRsrc) {
617 : assert(BufferRsrc);
618 1113 : auto PSV = BufferPSVs.try_emplace(
619 : BufferRsrc,
620 1113 : llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
621 2226 : return PSV.first->second.get();
622 : }
623 :
624 709 : const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
625 : const Value *ImgRsrc) {
626 : assert(ImgRsrc);
627 709 : auto PSV = ImagePSVs.try_emplace(
628 : ImgRsrc,
629 709 : llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
630 1418 : return PSV.first->second.get();
631 : }
632 :
633 0 : unsigned getOccupancy() const {
634 0 : return Occupancy;
635 : }
636 :
637 : unsigned getMinAllowedOccupancy() const {
638 41 : if (!isMemoryBound() && !needsWaveLimiter())
639 9 : return Occupancy;
640 32 : return (Occupancy < 4) ? Occupancy : 4;
641 : }
642 :
643 : void limitOccupancy(const MachineFunction &MF);
644 :
645 0 : void limitOccupancy(unsigned Limit) {
646 81145 : if (Occupancy > Limit)
647 300 : Occupancy = Limit;
648 0 : }
649 :
650 : void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
651 2 : if (Occupancy < Limit)
652 0 : Occupancy = Limit;
653 2 : limitOccupancy(MF);
654 : }
655 : };
656 :
657 : } // end namespace llvm
658 :
659 : #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|