LLVM  9.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/LLVMContext.h"
29 
30 using namespace llvm;
31 
32 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
33  for (unsigned i = 0; PSets[i] != -1; ++i) {
34  if (PSets[i] == (int)PSetID)
35  return true;
36  }
37  return false;
38 }
39 
40 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
41  BitVector &PressureSets) const {
42  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
43  const int *PSets = getRegUnitPressureSets(*U);
44  if (hasPressureSet(PSets, PSetID)) {
45  PressureSets.set(PSetID);
46  break;
47  }
48  }
49 }
50 
52  "amdgpu-spill-sgpr-to-smem",
53  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
54  cl::init(false));
55 
57  "amdgpu-spill-sgpr-to-vgpr",
58  cl::desc("Enable spilling VGPRs to SGPRs"),
60  cl::init(true));
61 
64  SGPRPressureSets(getNumRegPressureSets()),
65  VGPRPressureSets(getNumRegPressureSets()),
66  AGPRPressureSets(getNumRegPressureSets()),
67  SpillSGPRToVGPR(false),
68  SpillSGPRToSMEM(false),
69  isWave32(ST.isWave32()) {
70  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
71  SpillSGPRToSMEM = true;
72  else if (EnableSpillSGPRToVGPR)
73  SpillSGPRToVGPR = true;
74 
75  unsigned NumRegPressureSets = getNumRegPressureSets();
76 
77  SGPRSetID = NumRegPressureSets;
78  VGPRSetID = NumRegPressureSets;
79  AGPRSetID = NumRegPressureSets;
80 
81  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
82  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
83  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
84  classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
85  }
86 
87  // Determine the number of reg units for each pressure set.
88  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
89  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
90  const int *PSets = getRegUnitPressureSets(i);
91  for (unsigned j = 0; PSets[j] != -1; ++j) {
92  ++PressureSetRegUnits[PSets[j]];
93  }
94  }
95 
96  unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
97  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
98  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
99  VGPRSetID = i;
100  VGPRMax = PressureSetRegUnits[i];
101  continue;
102  }
103  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
104  SGPRSetID = i;
105  SGPRMax = PressureSetRegUnits[i];
106  }
107  if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
108  AGPRSetID = i;
109  AGPRMax = PressureSetRegUnits[i];
110  continue;
111  }
112  }
113 
114  assert(SGPRSetID < NumRegPressureSets &&
115  VGPRSetID < NumRegPressureSets &&
116  AGPRSetID < NumRegPressureSets);
117 }
118 
120  const MachineFunction &MF) const {
121 
122  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
123  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
124  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
125  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
126 }
127 
128 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
129  unsigned Reg;
130 
131  // Try to place it in a hole after PrivateSegmentBufferReg.
132  if (RegCount & 3) {
133  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
134  // alignment constraints, so we have a hole where can put the wave offset.
135  Reg = RegCount - 1;
136  } else {
137  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
138  // wave offset before it.
139  Reg = RegCount - 5;
140  }
141 
142  return Reg;
143 }
144 
146  const MachineFunction &MF) const {
147  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
149  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
150 }
151 
153  BitVector Reserved(getNumRegs());
154 
155  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
156  // this seems likely to result in bugs, so I'm marking them as reserved.
157  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
158  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
159 
160  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
161  reserveRegisterTuples(Reserved, AMDGPU::M0);
162 
163  // Reserve src_vccz, src_execz, src_scc.
164  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
165  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
166  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
167 
168  // Reserve the memory aperture registers.
169  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
170  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
171  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
172  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
173 
174  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
175  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
176 
177  // Reserve xnack_mask registers - support is not implemented in Codegen.
178  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
179 
180  // Reserve lds_direct register - support is not implemented in Codegen.
181  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
182 
183  // Reserve Trap Handler registers - support is not implemented in Codegen.
184  reserveRegisterTuples(Reserved, AMDGPU::TBA);
185  reserveRegisterTuples(Reserved, AMDGPU::TMA);
186  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
187  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
188  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
189  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
190  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
191  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
192  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
193  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
194 
195  // Reserve null register - it shall never be allocated
196  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
197 
198  // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
199  // will result in bugs.
200  if (isWave32) {
201  Reserved.set(AMDGPU::VCC);
202  Reserved.set(AMDGPU::VCC_HI);
203  }
204 
205  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
206 
207  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
208  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
209  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
210  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
211  reserveRegisterTuples(Reserved, Reg);
212  }
213 
214  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
215  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
216  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
217  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
218  reserveRegisterTuples(Reserved, Reg);
219  Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
220  reserveRegisterTuples(Reserved, Reg);
221  }
222 
224 
225  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
226  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
227  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
228  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
229  }
230 
231  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
232  if (ScratchRSrcReg != AMDGPU::NoRegister) {
233  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
234  // to spill.
235  // TODO: May need to reserve a VGPR if doing LDS spilling.
236  reserveRegisterTuples(Reserved, ScratchRSrcReg);
237  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
238  }
239 
240  // We have to assume the SP is needed in case there are calls in the function,
241  // which is detected after the function is lowered. If we aren't really going
242  // to need SP, don't bother reserving it.
243  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
244 
245  if (StackPtrReg != AMDGPU::NoRegister) {
246  reserveRegisterTuples(Reserved, StackPtrReg);
247  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
248  }
249 
250  unsigned FrameReg = MFI->getFrameOffsetReg();
251  if (FrameReg != AMDGPU::NoRegister) {
252  reserveRegisterTuples(Reserved, FrameReg);
253  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
254  }
255 
256  for (unsigned Reg : MFI->WWMReservedRegs) {
257  reserveRegisterTuples(Reserved, Reg);
258  }
259 
260  // FIXME: Stop using reserved registers for this.
261  for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
262  reserveRegisterTuples(Reserved, Reg);
263 
264  for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
265  reserveRegisterTuples(Reserved, Reg);
266 
267  return Reserved;
268 }
269 
272  // On entry, the base address is 0, so it can't possibly need any more
273  // alignment.
274 
275  // FIXME: Should be able to specify the entry frame alignment per calling
276  // convention instead.
277  if (Info->isEntryFunction())
278  return false;
279 
281 }
282 
285  if (Info->isEntryFunction()) {
286  const MachineFrameInfo &MFI = Fn.getFrameInfo();
287  return MFI.hasStackObjects() || MFI.hasCalls();
288  }
289 
290  // May need scavenger for dealing with callee saved registers.
291  return true;
292 }
293 
295  const MachineFunction &MF) const {
296  const MachineFrameInfo &MFI = MF.getFrameInfo();
297  if (MFI.hasStackObjects())
298  return true;
299 
300  // May need to deal with callee saved registers.
302  return !Info->isEntryFunction();
303 }
304 
306  const MachineFunction &MF) const {
307  const MachineFrameInfo &MFI = MF.getFrameInfo();
308  if (!MFI.hasStackObjects())
309  return false;
310 
311  // The scavenger is used for large frames which may require finding a free
312  // register for large offsets.
313  if (!isUInt<12>(MFI.getStackSize()))
314  return true;
315 
316  // If using scalar stores, for spills, m0 is needed for the scalar store
317  // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
318  // register for it during frame index elimination, so the scavenger is
319  // directly needed.
320  return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
321  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
322 }
323 
325  const MachineFunction &) const {
326  // There are no special dedicated stack or frame pointers.
327  return true;
328 }
329 
331  // This helps catch bugs as verifier errors.
332  return true;
333 }
334 
337 
338  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
339  AMDGPU::OpName::offset);
340  return MI->getOperand(OffIdx).getImm();
341 }
342 
344  int Idx) const {
345  if (!SIInstrInfo::isMUBUF(*MI))
346  return 0;
347 
349  AMDGPU::OpName::vaddr) &&
350  "Should never see frame index on non-address operand");
351 
352  return getMUBUFInstrOffset(MI);
353 }
354 
356  if (!MI->mayLoadOrStore())
357  return false;
358 
359  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
360 
361  return !isUInt<12>(FullOffset);
362 }
363 
365  unsigned BaseReg,
366  int FrameIdx,
367  int64_t Offset) const {
369  DebugLoc DL; // Defaults to "unknown"
370 
371  if (Ins != MBB->end())
372  DL = Ins->getDebugLoc();
373 
374  MachineFunction *MF = MBB->getParent();
375  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
376  const SIInstrInfo *TII = Subtarget.getInstrInfo();
377 
378  if (Offset == 0) {
379  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
380  .addFrameIndex(FrameIdx);
381  return;
382  }
383 
385  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
386 
387  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
388 
389  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
390  .addImm(Offset);
391  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
392  .addFrameIndex(FrameIdx);
393 
394  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
395  .addReg(OffsetReg, RegState::Kill)
396  .addReg(FIReg)
397  .addImm(0); // clamp bit
398 }
399 
401  int64_t Offset) const {
402 
403  MachineBasicBlock *MBB = MI.getParent();
404  MachineFunction *MF = MBB->getParent();
405  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
406  const SIInstrInfo *TII = Subtarget.getInstrInfo();
407 
408 #ifndef NDEBUG
409  // FIXME: Is it possible to be storing a frame index to itself?
410  bool SeenFI = false;
411  for (const MachineOperand &MO: MI.operands()) {
412  if (MO.isFI()) {
413  if (SeenFI)
414  llvm_unreachable("should not see multiple frame indices");
415 
416  SeenFI = true;
417  }
418  }
419 #endif
420 
421  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
422  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
423  assert(TII->isMUBUF(MI));
424  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
425  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
426  "should only be seeing frame offset relative FrameIndex");
427 
428 
429  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
430  int64_t NewOffset = OffsetOp->getImm() + Offset;
431  assert(isUInt<12>(NewOffset) && "offset should be legal");
432 
433  FIOp->ChangeToRegister(BaseReg, false);
434  OffsetOp->setImm(NewOffset);
435 }
436 
438  unsigned BaseReg,
439  int64_t Offset) const {
440  if (!SIInstrInfo::isMUBUF(*MI))
441  return false;
442 
443  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
444 
445  return isUInt<12>(NewOffset);
446 }
447 
449  const MachineFunction &MF, unsigned Kind) const {
450  // This is inaccurate. It depends on the instruction and address space. The
451  // only place where we should hit this is for dealing with frame indexes /
452  // private accesses, so this is correct in that case.
453  return &AMDGPU::VGPR_32RegClass;
454 }
455 
456 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
457 
458  switch (Op) {
459  case AMDGPU::SI_SPILL_S1024_SAVE:
460  case AMDGPU::SI_SPILL_S1024_RESTORE:
461  case AMDGPU::SI_SPILL_V1024_SAVE:
462  case AMDGPU::SI_SPILL_V1024_RESTORE:
463  case AMDGPU::SI_SPILL_A1024_SAVE:
464  case AMDGPU::SI_SPILL_A1024_RESTORE:
465  return 32;
466  case AMDGPU::SI_SPILL_S512_SAVE:
467  case AMDGPU::SI_SPILL_S512_RESTORE:
468  case AMDGPU::SI_SPILL_V512_SAVE:
469  case AMDGPU::SI_SPILL_V512_RESTORE:
470  case AMDGPU::SI_SPILL_A512_SAVE:
471  case AMDGPU::SI_SPILL_A512_RESTORE:
472  return 16;
473  case AMDGPU::SI_SPILL_S256_SAVE:
474  case AMDGPU::SI_SPILL_S256_RESTORE:
475  case AMDGPU::SI_SPILL_V256_SAVE:
476  case AMDGPU::SI_SPILL_V256_RESTORE:
477  return 8;
478  case AMDGPU::SI_SPILL_S160_SAVE:
479  case AMDGPU::SI_SPILL_S160_RESTORE:
480  case AMDGPU::SI_SPILL_V160_SAVE:
481  case AMDGPU::SI_SPILL_V160_RESTORE:
482  return 5;
483  case AMDGPU::SI_SPILL_S128_SAVE:
484  case AMDGPU::SI_SPILL_S128_RESTORE:
485  case AMDGPU::SI_SPILL_V128_SAVE:
486  case AMDGPU::SI_SPILL_V128_RESTORE:
487  case AMDGPU::SI_SPILL_A128_SAVE:
488  case AMDGPU::SI_SPILL_A128_RESTORE:
489  return 4;
490  case AMDGPU::SI_SPILL_S96_SAVE:
491  case AMDGPU::SI_SPILL_S96_RESTORE:
492  case AMDGPU::SI_SPILL_V96_SAVE:
493  case AMDGPU::SI_SPILL_V96_RESTORE:
494  return 3;
495  case AMDGPU::SI_SPILL_S64_SAVE:
496  case AMDGPU::SI_SPILL_S64_RESTORE:
497  case AMDGPU::SI_SPILL_V64_SAVE:
498  case AMDGPU::SI_SPILL_V64_RESTORE:
499  case AMDGPU::SI_SPILL_A64_SAVE:
500  case AMDGPU::SI_SPILL_A64_RESTORE:
501  return 2;
502  case AMDGPU::SI_SPILL_S32_SAVE:
503  case AMDGPU::SI_SPILL_S32_RESTORE:
504  case AMDGPU::SI_SPILL_V32_SAVE:
505  case AMDGPU::SI_SPILL_V32_RESTORE:
506  case AMDGPU::SI_SPILL_A32_SAVE:
507  case AMDGPU::SI_SPILL_A32_RESTORE:
508  return 1;
509  default: llvm_unreachable("Invalid spill opcode");
510  }
511 }
512 
513 static int getOffsetMUBUFStore(unsigned Opc) {
514  switch (Opc) {
515  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
516  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
517  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
518  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
519  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
520  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
521  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
522  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
523  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
524  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
525  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
526  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
527  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
528  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
529  default:
530  return -1;
531  }
532 }
533 
534 static int getOffsetMUBUFLoad(unsigned Opc) {
535  switch (Opc) {
536  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
537  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
538  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
539  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
540  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
541  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
542  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
543  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
544  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
545  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
546  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
547  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
548  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
549  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
550  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
551  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
552  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
553  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
554  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
555  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
556  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
557  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
558  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
559  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
560  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
561  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
562  default:
563  return -1;
564  }
565 }
566 
568  int Index,
569  unsigned Lane,
570  unsigned ValueReg,
571  bool IsKill) {
572  MachineBasicBlock *MBB = MI->getParent();
573  MachineFunction *MF = MI->getParent()->getParent();
575  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
576  const SIInstrInfo *TII = ST.getInstrInfo();
577 
578  MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
579 
580  if (Reg == AMDGPU::NoRegister)
581  return MachineInstrBuilder();
582 
583  bool IsStore = MI->mayStore();
585  auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
586 
587  unsigned Dst = IsStore ? Reg : ValueReg;
588  unsigned Src = IsStore ? ValueReg : Reg;
589  unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
590  : AMDGPU::V_ACCVGPR_READ_B32;
591 
592  return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
593  .addReg(Src, getKillRegState(IsKill));
594 }
595 
596 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
597 // need to handle the case where an SGPR may need to be spilled while spilling.
599  MachineFrameInfo &MFI,
601  int Index,
602  int64_t Offset) {
603  MachineBasicBlock *MBB = MI->getParent();
604  const DebugLoc &DL = MI->getDebugLoc();
605  bool IsStore = MI->mayStore();
606 
607  unsigned Opc = MI->getOpcode();
608  int LoadStoreOp = IsStore ?
610  if (LoadStoreOp == -1)
611  return false;
612 
613  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
614  if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
615  return true;
616 
617  MachineInstrBuilder NewMI =
618  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
619  .add(*Reg)
620  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
621  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
622  .addImm(Offset)
623  .addImm(0) // glc
624  .addImm(0) // slc
625  .addImm(0) // tfe
626  .addImm(0) // dlc
627  .cloneMemRefs(*MI);
628 
629  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
630  AMDGPU::OpName::vdata_in);
631  if (VDataIn)
632  NewMI.add(*VDataIn);
633  return true;
634 }
635 
636 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
637  unsigned LoadStoreOp,
638  int Index,
639  unsigned ValueReg,
640  bool IsKill,
641  unsigned ScratchRsrcReg,
642  unsigned ScratchOffsetReg,
643  int64_t InstOffset,
644  MachineMemOperand *MMO,
645  RegScavenger *RS) const {
646  MachineBasicBlock *MBB = MI->getParent();
647  MachineFunction *MF = MI->getParent()->getParent();
648  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
649  const SIInstrInfo *TII = ST.getInstrInfo();
650  const MachineFrameInfo &MFI = MF->getFrameInfo();
651 
652  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
653  const DebugLoc &DL = MI->getDebugLoc();
654  bool IsStore = Desc.mayStore();
655 
656  bool Scavenged = false;
657  unsigned SOffset = ScratchOffsetReg;
658 
659  const unsigned EltSize = 4;
660  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
661  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
662  unsigned Size = NumSubRegs * EltSize;
663  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
664  int64_t ScratchOffsetRegDelta = 0;
665 
666  unsigned Align = MFI.getObjectAlignment(Index);
667  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
668 
669  Register TmpReg =
670  hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
671  : Register();
672 
673  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
674 
675  if (!isUInt<12>(Offset + Size - EltSize)) {
676  SOffset = AMDGPU::NoRegister;
677 
678  // We currently only support spilling VGPRs to EltSize boundaries, meaning
679  // we can simplify the adjustment of Offset here to just scale with
680  // WavefrontSize.
681  Offset *= ST.getWavefrontSize();
682 
683  // We don't have access to the register scavenger if this function is called
684  // during PEI::scavengeFrameVirtualRegs().
685  if (RS)
686  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
687 
688  if (SOffset == AMDGPU::NoRegister) {
689  // There are no free SGPRs, and since we are in the process of spilling
690  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
691  // on SI/CI and on VI it is true until we implement spilling using scalar
692  // stores), we have no way to free up an SGPR. Our solution here is to
693  // add the offset directly to the ScratchOffset register, and then
694  // subtract the offset after the spill to return ScratchOffset to it's
695  // original value.
696  SOffset = ScratchOffsetReg;
697  ScratchOffsetRegDelta = Offset;
698  } else {
699  Scavenged = true;
700  }
701 
702  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
703  .addReg(ScratchOffsetReg)
704  .addImm(Offset);
705 
706  Offset = 0;
707  }
708 
709  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
710  unsigned SubReg = NumSubRegs == 1 ?
711  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
712 
713  unsigned SOffsetRegState = 0;
714  unsigned SrcDstRegState = getDefRegState(!IsStore);
715  if (i + 1 == e) {
716  SOffsetRegState |= getKillRegState(Scavenged);
717  // The last implicit use carries the "Kill" flag.
718  SrcDstRegState |= getKillRegState(IsKill);
719  }
720 
721  auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
722 
723  if (!MIB.getInstr()) {
724  unsigned FinalReg = SubReg;
725  if (TmpReg != AMDGPU::NoRegister) {
726  if (IsStore)
727  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
728  .addReg(SubReg, getKillRegState(IsKill));
729  SubReg = TmpReg;
730  }
731 
732  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
733  MachineMemOperand *NewMMO
734  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
735  EltSize, MinAlign(Align, EltSize * i));
736 
737  MIB = BuildMI(*MBB, MI, DL, Desc)
738  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
739  .addReg(ScratchRsrcReg)
740  .addReg(SOffset, SOffsetRegState)
741  .addImm(Offset)
742  .addImm(0) // glc
743  .addImm(0) // slc
744  .addImm(0) // tfe
745  .addImm(0) // dlc
746  .addMemOperand(NewMMO);
747 
748  if (!IsStore && TmpReg != AMDGPU::NoRegister)
749  MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
750  FinalReg)
751  .addReg(TmpReg, RegState::Kill);
752  }
753 
754  if (NumSubRegs > 1)
755  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
756  }
757 
758  if (ScratchOffsetRegDelta != 0) {
759  // Subtract the offset we added to the ScratchOffset register.
760  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
761  .addReg(ScratchOffsetReg)
762  .addImm(ScratchOffsetRegDelta);
763  }
764 }
765 
766 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
767  bool Store) {
768  if (SuperRegSize % 16 == 0) {
769  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
770  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
771  }
772 
773  if (SuperRegSize % 8 == 0) {
774  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
775  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
776  }
777 
778  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
779  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
780 }
781 
783  int Index,
784  RegScavenger *RS,
785  bool OnlyToVGPR) const {
786  MachineBasicBlock *MBB = MI->getParent();
787  MachineFunction *MF = MBB->getParent();
789  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
790 
792  = MFI->getSGPRToVGPRSpills(Index);
793  bool SpillToVGPR = !VGPRSpills.empty();
794  if (OnlyToVGPR && !SpillToVGPR)
795  return false;
796 
798  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
799  const SIInstrInfo *TII = ST.getInstrInfo();
800 
801  unsigned SuperReg = MI->getOperand(0).getReg();
802  bool IsKill = MI->getOperand(0).isKill();
803  const DebugLoc &DL = MI->getDebugLoc();
804 
805  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
806 
807  bool SpillToSMEM = spillSGPRToSMEM();
808  if (SpillToSMEM && OnlyToVGPR)
809  return false;
810 
811  Register FrameReg = getFrameRegister(*MF);
812 
813  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
814  SuperReg != MFI->getFrameOffsetReg() &&
815  SuperReg != MFI->getScratchWaveOffsetReg()));
816 
817  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
818 
819  unsigned OffsetReg = AMDGPU::M0;
820  unsigned M0CopyReg = AMDGPU::NoRegister;
821 
822  if (SpillToSMEM) {
823  if (RS->isRegUsed(AMDGPU::M0)) {
824  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
825  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
826  .addReg(AMDGPU::M0);
827  }
828  }
829 
830  unsigned ScalarStoreOp;
831  unsigned EltSize = 4;
832  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
833  if (SpillToSMEM && isSGPRClass(RC)) {
834  // XXX - if private_element_size is larger than 4 it might be useful to be
835  // able to spill wider vmem spills.
836  std::tie(EltSize, ScalarStoreOp) =
837  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
838  }
839 
840  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
841  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
842 
843  // SubReg carries the "Kill" flag when SubReg == SuperReg.
844  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
845  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
846  unsigned SubReg = NumSubRegs == 1 ?
847  SuperReg : getSubReg(SuperReg, SplitParts[i]);
848 
849  if (SpillToSMEM) {
850  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
851 
852  // The allocated memory size is really the wavefront size * the frame
853  // index size. The widest register class is 64 bytes, so a 4-byte scratch
854  // allocation is enough to spill this in a single stack object.
855  //
856  // FIXME: Frame size/offsets are computed earlier than this, so the extra
857  // space is still unnecessarily allocated.
858 
859  unsigned Align = FrameInfo.getObjectAlignment(Index);
860  MachinePointerInfo PtrInfo
861  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
862  MachineMemOperand *MMO
864  EltSize, MinAlign(Align, EltSize * i));
865 
866  // SMEM instructions only support a single offset, so increment the wave
867  // offset.
868 
869  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
870  if (Offset != 0) {
871  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
872  .addReg(FrameReg)
873  .addImm(Offset);
874  } else {
875  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
876  .addReg(FrameReg);
877  }
878 
879  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
880  .addReg(SubReg, getKillRegState(IsKill)) // sdata
881  .addReg(MFI->getScratchRSrcReg()) // sbase
882  .addReg(OffsetReg, RegState::Kill) // soff
883  .addImm(0) // glc
884  .addImm(0) // dlc
885  .addMemOperand(MMO);
886 
887  continue;
888  }
889 
890  if (SpillToVGPR) {
891  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
892 
893  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
894  // only circumstance in which we say it is undefined is when it is the
895  // first spill to this VGPR in the first basic block.
896  bool VGPRDefined = true;
897  if (MBB == &MF->front())
898  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
899 
900  // Mark the "old value of vgpr" input undef only if this is the first sgpr
901  // spill to this specific vgpr in the first basic block.
902  BuildMI(*MBB, MI, DL,
903  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
904  Spill.VGPR)
905  .addReg(SubReg, getKillRegState(IsKill))
906  .addImm(Spill.Lane)
907  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
908 
909  // FIXME: Since this spills to another register instead of an actual
910  // frame index, we should delete the frame index when all references to
911  // it are fixed.
912  } else {
913  // XXX - Can to VGPR spill fail for some subregisters but not others?
914  if (OnlyToVGPR)
915  return false;
916 
917  // Spill SGPR to a frame index.
918  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
919  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
920  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
921 
923  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
924  .addReg(SubReg, SubKillState);
925 
926 
927  // There could be undef components of a spilled super register.
928  // TODO: Can we detect this and skip the spill?
929  if (NumSubRegs > 1) {
930  // The last implicit use of the SuperReg carries the "Kill" flag.
931  unsigned SuperKillState = 0;
932  if (i + 1 == e)
933  SuperKillState |= getKillRegState(IsKill);
934  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
935  }
936 
937  unsigned Align = FrameInfo.getObjectAlignment(Index);
938  MachinePointerInfo PtrInfo
939  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
940  MachineMemOperand *MMO
942  EltSize, MinAlign(Align, EltSize * i));
943  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
944  .addReg(TmpReg, RegState::Kill) // src
945  .addFrameIndex(Index) // vaddr
946  .addReg(MFI->getScratchRSrcReg()) // srrsrc
947  .addReg(MFI->getStackPtrOffsetReg()) // soffset
948  .addImm(i * 4) // offset
949  .addMemOperand(MMO);
950  }
951  }
952 
953  if (M0CopyReg != AMDGPU::NoRegister) {
954  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
955  .addReg(M0CopyReg, RegState::Kill);
956  }
957 
958  MI->eraseFromParent();
959  MFI->addToSpilledSGPRs(NumSubRegs);
960  return true;
961 }
962 
964  int Index,
965  RegScavenger *RS,
966  bool OnlyToVGPR) const {
967  MachineFunction *MF = MI->getParent()->getParent();
969  MachineBasicBlock *MBB = MI->getParent();
971 
973  = MFI->getSGPRToVGPRSpills(Index);
974  bool SpillToVGPR = !VGPRSpills.empty();
975  if (OnlyToVGPR && !SpillToVGPR)
976  return false;
977 
978  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
979  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
980  const SIInstrInfo *TII = ST.getInstrInfo();
981  const DebugLoc &DL = MI->getDebugLoc();
982 
983  unsigned SuperReg = MI->getOperand(0).getReg();
984  bool SpillToSMEM = spillSGPRToSMEM();
985  if (SpillToSMEM && OnlyToVGPR)
986  return false;
987 
988  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
989 
990  unsigned OffsetReg = AMDGPU::M0;
991  unsigned M0CopyReg = AMDGPU::NoRegister;
992 
993  if (SpillToSMEM) {
994  if (RS->isRegUsed(AMDGPU::M0)) {
995  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
996  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
997  .addReg(AMDGPU::M0);
998  }
999  }
1000 
1001  unsigned EltSize = 4;
1002  unsigned ScalarLoadOp;
1003 
1004  Register FrameReg = getFrameRegister(*MF);
1005 
1006  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
1007  if (SpillToSMEM && isSGPRClass(RC)) {
1008  // XXX - if private_element_size is larger than 4 it might be useful to be
1009  // able to spill wider vmem spills.
1010  std::tie(EltSize, ScalarLoadOp) =
1011  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
1012  }
1013 
1014  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
1015  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
1016 
1017  // SubReg carries the "Kill" flag when SubReg == SuperReg.
1018  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
1019 
1020  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
1021  unsigned SubReg = NumSubRegs == 1 ?
1022  SuperReg : getSubReg(SuperReg, SplitParts[i]);
1023 
1024  if (SpillToSMEM) {
1025  // FIXME: Size may be > 4 but extra bytes wasted.
1026  unsigned Align = FrameInfo.getObjectAlignment(Index);
1027  MachinePointerInfo PtrInfo
1028  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1029  MachineMemOperand *MMO
1031  EltSize, MinAlign(Align, EltSize * i));
1032 
1033  // Add i * 4 offset
1034  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
1035  if (Offset != 0) {
1036  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
1037  .addReg(FrameReg)
1038  .addImm(Offset);
1039  } else {
1040  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
1041  .addReg(FrameReg);
1042  }
1043 
1044  auto MIB =
1045  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
1046  .addReg(MFI->getScratchRSrcReg()) // sbase
1047  .addReg(OffsetReg, RegState::Kill) // soff
1048  .addImm(0) // glc
1049  .addImm(0) // dlc
1050  .addMemOperand(MMO);
1051 
1052  if (NumSubRegs > 1 && i == 0)
1053  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1054 
1055  continue;
1056  }
1057 
1058  if (SpillToVGPR) {
1059  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1060  auto MIB =
1061  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
1062  SubReg)
1063  .addReg(Spill.VGPR)
1064  .addImm(Spill.Lane);
1065 
1066  if (NumSubRegs > 1 && i == 0)
1067  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1068  } else {
1069  if (OnlyToVGPR)
1070  return false;
1071 
1072  // Restore SGPR from a stack slot.
1073  // FIXME: We should use S_LOAD_DWORD here for VI.
1074  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1075  unsigned Align = FrameInfo.getObjectAlignment(Index);
1076 
1077  MachinePointerInfo PtrInfo
1078  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1079 
1080  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
1081  MachineMemOperand::MOLoad, EltSize,
1082  MinAlign(Align, EltSize * i));
1083 
1084  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
1085  .addFrameIndex(Index) // vaddr
1086  .addReg(MFI->getScratchRSrcReg()) // srsrc
1087  .addReg(MFI->getStackPtrOffsetReg()) // soffset
1088  .addImm(i * 4) // offset
1089  .addMemOperand(MMO);
1090 
1091  auto MIB =
1092  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
1093  .addReg(TmpReg, RegState::Kill);
1094 
1095  if (NumSubRegs > 1)
1096  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
1097  }
1098  }
1099 
1100  if (M0CopyReg != AMDGPU::NoRegister) {
1101  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
1102  .addReg(M0CopyReg, RegState::Kill);
1103  }
1104 
1105  MI->eraseFromParent();
1106  return true;
1107 }
1108 
1109 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1110 /// a VGPR and the stack slot can be safely eliminated when all other users are
1111 /// handled.
1114  int FI,
1115  RegScavenger *RS) const {
1116  switch (MI->getOpcode()) {
1117  case AMDGPU::SI_SPILL_S1024_SAVE:
1118  case AMDGPU::SI_SPILL_S512_SAVE:
1119  case AMDGPU::SI_SPILL_S256_SAVE:
1120  case AMDGPU::SI_SPILL_S160_SAVE:
1121  case AMDGPU::SI_SPILL_S128_SAVE:
1122  case AMDGPU::SI_SPILL_S96_SAVE:
1123  case AMDGPU::SI_SPILL_S64_SAVE:
1124  case AMDGPU::SI_SPILL_S32_SAVE:
1125  return spillSGPR(MI, FI, RS, true);
1126  case AMDGPU::SI_SPILL_S1024_RESTORE:
1127  case AMDGPU::SI_SPILL_S512_RESTORE:
1128  case AMDGPU::SI_SPILL_S256_RESTORE:
1129  case AMDGPU::SI_SPILL_S160_RESTORE:
1130  case AMDGPU::SI_SPILL_S128_RESTORE:
1131  case AMDGPU::SI_SPILL_S96_RESTORE:
1132  case AMDGPU::SI_SPILL_S64_RESTORE:
1133  case AMDGPU::SI_SPILL_S32_RESTORE:
1134  return restoreSGPR(MI, FI, RS, true);
1135  default:
1136  llvm_unreachable("not an SGPR spill instruction");
1137  }
1138 }
1139 
1141  int SPAdj, unsigned FIOperandNum,
1142  RegScavenger *RS) const {
1143  MachineFunction *MF = MI->getParent()->getParent();
1145  MachineBasicBlock *MBB = MI->getParent();
1147  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1148  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1149  const SIInstrInfo *TII = ST.getInstrInfo();
1150  DebugLoc DL = MI->getDebugLoc();
1151 
1152  assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
1153 
1154  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1155  int Index = MI->getOperand(FIOperandNum).getIndex();
1156 
1157  Register FrameReg = getFrameRegister(*MF);
1158 
1159  switch (MI->getOpcode()) {
1160  // SGPR register spill
1161  case AMDGPU::SI_SPILL_S1024_SAVE:
1162  case AMDGPU::SI_SPILL_S512_SAVE:
1163  case AMDGPU::SI_SPILL_S256_SAVE:
1164  case AMDGPU::SI_SPILL_S160_SAVE:
1165  case AMDGPU::SI_SPILL_S128_SAVE:
1166  case AMDGPU::SI_SPILL_S96_SAVE:
1167  case AMDGPU::SI_SPILL_S64_SAVE:
1168  case AMDGPU::SI_SPILL_S32_SAVE: {
1169  spillSGPR(MI, Index, RS);
1170  break;
1171  }
1172 
1173  // SGPR register restore
1174  case AMDGPU::SI_SPILL_S1024_RESTORE:
1175  case AMDGPU::SI_SPILL_S512_RESTORE:
1176  case AMDGPU::SI_SPILL_S256_RESTORE:
1177  case AMDGPU::SI_SPILL_S160_RESTORE:
1178  case AMDGPU::SI_SPILL_S128_RESTORE:
1179  case AMDGPU::SI_SPILL_S96_RESTORE:
1180  case AMDGPU::SI_SPILL_S64_RESTORE:
1181  case AMDGPU::SI_SPILL_S32_RESTORE: {
1182  restoreSGPR(MI, Index, RS);
1183  break;
1184  }
1185 
1186  // VGPR register spill
1187  case AMDGPU::SI_SPILL_V1024_SAVE:
1188  case AMDGPU::SI_SPILL_V512_SAVE:
1189  case AMDGPU::SI_SPILL_V256_SAVE:
1190  case AMDGPU::SI_SPILL_V160_SAVE:
1191  case AMDGPU::SI_SPILL_V128_SAVE:
1192  case AMDGPU::SI_SPILL_V96_SAVE:
1193  case AMDGPU::SI_SPILL_V64_SAVE:
1194  case AMDGPU::SI_SPILL_V32_SAVE:
1195  case AMDGPU::SI_SPILL_A1024_SAVE:
1196  case AMDGPU::SI_SPILL_A512_SAVE:
1197  case AMDGPU::SI_SPILL_A128_SAVE:
1198  case AMDGPU::SI_SPILL_A64_SAVE:
1199  case AMDGPU::SI_SPILL_A32_SAVE: {
1200  const MachineOperand *VData = TII->getNamedOperand(*MI,
1201  AMDGPU::OpName::vdata);
1202  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1203  MFI->getStackPtrOffsetReg());
1204 
1205  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1206  Index,
1207  VData->getReg(), VData->isKill(),
1208  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1209  FrameReg,
1210  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1211  *MI->memoperands_begin(),
1212  RS);
1213  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1214  MI->eraseFromParent();
1215  break;
1216  }
1217  case AMDGPU::SI_SPILL_V32_RESTORE:
1218  case AMDGPU::SI_SPILL_V64_RESTORE:
1219  case AMDGPU::SI_SPILL_V96_RESTORE:
1220  case AMDGPU::SI_SPILL_V128_RESTORE:
1221  case AMDGPU::SI_SPILL_V160_RESTORE:
1222  case AMDGPU::SI_SPILL_V256_RESTORE:
1223  case AMDGPU::SI_SPILL_V512_RESTORE:
1224  case AMDGPU::SI_SPILL_V1024_RESTORE:
1225  case AMDGPU::SI_SPILL_A32_RESTORE:
1226  case AMDGPU::SI_SPILL_A64_RESTORE:
1227  case AMDGPU::SI_SPILL_A128_RESTORE:
1228  case AMDGPU::SI_SPILL_A512_RESTORE:
1229  case AMDGPU::SI_SPILL_A1024_RESTORE: {
1230  const MachineOperand *VData = TII->getNamedOperand(*MI,
1231  AMDGPU::OpName::vdata);
1232  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1233  MFI->getStackPtrOffsetReg());
1234 
1235  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1236  Index,
1237  VData->getReg(), VData->isKill(),
1238  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1239  FrameReg,
1240  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1241  *MI->memoperands_begin(),
1242  RS);
1243  MI->eraseFromParent();
1244  break;
1245  }
1246 
1247  default: {
1248  const DebugLoc &DL = MI->getDebugLoc();
1249  bool IsMUBUF = TII->isMUBUF(*MI);
1250 
1251  if (!IsMUBUF && !MFI->isEntryFunction()) {
1252  // Convert to an absolute stack address by finding the offset from the
1253  // scratch wave base and scaling by the wave size.
1254  //
1255  // In an entry function/kernel the offset is already the absolute
1256  // address relative to the frame register.
1257 
1258  unsigned DiffReg
1259  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1260 
1261  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1262  Register ResultReg = IsCopy ?
1263  MI->getOperand(0).getReg() :
1264  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1265 
1266  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1267  .addReg(FrameReg)
1269 
1270  int64_t Offset = FrameInfo.getObjectOffset(Index);
1271  if (Offset == 0) {
1272  // XXX - This never happens because of emergency scavenging slot at 0?
1273  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1274  .addImm(Log2_32(ST.getWavefrontSize()))
1275  .addReg(DiffReg);
1276  } else {
1277  unsigned ScaledReg
1278  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1279 
1280  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1281  .addImm(Log2_32(ST.getWavefrontSize()))
1282  .addReg(DiffReg, RegState::Kill);
1283 
1284  // TODO: Fold if use instruction is another add of a constant.
1286  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1287  .addImm(Offset)
1288  .addReg(ScaledReg, RegState::Kill)
1289  .addImm(0); // clamp bit
1290  } else {
1291  unsigned ConstOffsetReg
1292  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1293 
1294  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1295  .addImm(Offset);
1296  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1297  .addReg(ConstOffsetReg, RegState::Kill)
1298  .addReg(ScaledReg, RegState::Kill)
1299  .addImm(0); // clamp bit
1300  }
1301  }
1302 
1303  // Don't introduce an extra copy if we're just materializing in a mov.
1304  if (IsCopy)
1305  MI->eraseFromParent();
1306  else
1307  FIOp.ChangeToRegister(ResultReg, false, false, true);
1308  return;
1309  }
1310 
1311  if (IsMUBUF) {
1312  // Disable offen so we don't need a 0 vgpr base.
1313  assert(static_cast<int>(FIOperandNum) ==
1314  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1315  AMDGPU::OpName::vaddr));
1316 
1317  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1318  MFI->getStackPtrOffsetReg());
1319 
1320  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
1321 
1322  int64_t Offset = FrameInfo.getObjectOffset(Index);
1323  int64_t OldImm
1324  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1325  int64_t NewOffset = OldImm + Offset;
1326 
1327  if (isUInt<12>(NewOffset) &&
1328  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1329  MI->eraseFromParent();
1330  return;
1331  }
1332  }
1333 
1334  // If the offset is simply too big, don't convert to a scratch wave offset
1335  // relative index.
1336 
1337  int64_t Offset = FrameInfo.getObjectOffset(Index);
1338  FIOp.ChangeToImmediate(Offset);
1339  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1340  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1341  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1342  .addImm(Offset);
1343  FIOp.ChangeToRegister(TmpReg, false, false, true);
1344  }
1345  }
1346  }
1347 }
1348 
1351  unsigned Size = getRegSizeInBits(*RC);
1352  unsigned AltName = AMDGPU::NoRegAltName;
1353 
1354  switch (Size) {
1355  case 32: AltName = AMDGPU::Reg32; break;
1356  case 64: AltName = AMDGPU::Reg64; break;
1357  case 96: AltName = AMDGPU::Reg96; break;
1358  case 128: AltName = AMDGPU::Reg128; break;
1359  case 160: AltName = AMDGPU::Reg160; break;
1360  case 256: AltName = AMDGPU::Reg256; break;
1361  case 512: AltName = AMDGPU::Reg512; break;
1362  case 1024: AltName = AMDGPU::Reg1024; break;
1363  }
1364  return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
1365 }
1366 
1367 // FIXME: This is very slow. It might be worth creating a map from physreg to
1368 // register class.
1371 
1372  static const TargetRegisterClass *const BaseClasses[] = {
1373  &AMDGPU::VGPR_32RegClass,
1374  &AMDGPU::SReg_32RegClass,
1375  &AMDGPU::AGPR_32RegClass,
1376  &AMDGPU::VReg_64RegClass,
1377  &AMDGPU::SReg_64RegClass,
1378  &AMDGPU::AReg_64RegClass,
1379  &AMDGPU::VReg_96RegClass,
1380  &AMDGPU::SReg_96RegClass,
1381  &AMDGPU::VReg_128RegClass,
1382  &AMDGPU::SReg_128RegClass,
1383  &AMDGPU::AReg_128RegClass,
1384  &AMDGPU::VReg_160RegClass,
1385  &AMDGPU::SReg_160RegClass,
1386  &AMDGPU::VReg_256RegClass,
1387  &AMDGPU::SReg_256RegClass,
1388  &AMDGPU::VReg_512RegClass,
1389  &AMDGPU::SReg_512RegClass,
1390  &AMDGPU::AReg_512RegClass,
1391  &AMDGPU::SReg_1024RegClass,
1392  &AMDGPU::VReg_1024RegClass,
1393  &AMDGPU::AReg_1024RegClass,
1394  &AMDGPU::SCC_CLASSRegClass,
1395  &AMDGPU::Pseudo_SReg_32RegClass,
1396  &AMDGPU::Pseudo_SReg_128RegClass,
1397  };
1398 
1399  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1400  if (BaseClass->contains(Reg)) {
1401  return BaseClass;
1402  }
1403  }
1404  return nullptr;
1405 }
1406 
1407 // TODO: It might be helpful to have some target specific flags in
1408 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1410  unsigned Size = getRegSizeInBits(*RC);
1411  if (Size < 32)
1412  return false;
1413  switch (Size) {
1414  case 32:
1415  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1416  case 64:
1417  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1418  case 96:
1419  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1420  case 128:
1421  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1422  case 160:
1423  return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1424  case 256:
1425  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1426  case 512:
1427  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1428  case 1024:
1429  return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
1430  default:
1431  llvm_unreachable("Invalid register class size");
1432  }
1433 }
1434 
1436  unsigned Size = getRegSizeInBits(*RC);
1437  if (Size < 32)
1438  return false;
1439  switch (Size) {
1440  case 32:
1441  return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
1442  case 64:
1443  return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
1444  case 96:
1445  return false;
1446  case 128:
1447  return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
1448  case 160:
1449  case 256:
1450  return false;
1451  case 512:
1452  return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
1453  case 1024:
1454  return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
1455  default:
1456  llvm_unreachable("Invalid register class size");
1457  }
1458 }
1459 
1461  const TargetRegisterClass *SRC) const {
1462  switch (getRegSizeInBits(*SRC)) {
1463  case 32:
1464  return &AMDGPU::VGPR_32RegClass;
1465  case 64:
1466  return &AMDGPU::VReg_64RegClass;
1467  case 96:
1468  return &AMDGPU::VReg_96RegClass;
1469  case 128:
1470  return &AMDGPU::VReg_128RegClass;
1471  case 160:
1472  return &AMDGPU::VReg_160RegClass;
1473  case 256:
1474  return &AMDGPU::VReg_256RegClass;
1475  case 512:
1476  return &AMDGPU::VReg_512RegClass;
1477  case 1024:
1478  return &AMDGPU::VReg_1024RegClass;
1479  default:
1480  llvm_unreachable("Invalid register class size");
1481  }
1482 }
1483 
1485  const TargetRegisterClass *SRC) const {
1486  switch (getRegSizeInBits(*SRC)) {
1487  case 32:
1488  return &AMDGPU::AGPR_32RegClass;
1489  case 64:
1490  return &AMDGPU::AReg_64RegClass;
1491  case 128:
1492  return &AMDGPU::AReg_128RegClass;
1493  case 512:
1494  return &AMDGPU::AReg_512RegClass;
1495  case 1024:
1496  return &AMDGPU::AReg_1024RegClass;
1497  default:
1498  llvm_unreachable("Invalid register class size");
1499  }
1500 }
1501 
1503  const TargetRegisterClass *VRC) const {
1504  switch (getRegSizeInBits(*VRC)) {
1505  case 32:
1506  return &AMDGPU::SGPR_32RegClass;
1507  case 64:
1508  return &AMDGPU::SReg_64RegClass;
1509  case 96:
1510  return &AMDGPU::SReg_96RegClass;
1511  case 128:
1512  return &AMDGPU::SReg_128RegClass;
1513  case 160:
1514  return &AMDGPU::SReg_160RegClass;
1515  case 256:
1516  return &AMDGPU::SReg_256RegClass;
1517  case 512:
1518  return &AMDGPU::SReg_512RegClass;
1519  case 1024:
1520  return &AMDGPU::SReg_1024RegClass;
1521  default:
1522  llvm_unreachable("Invalid register class size");
1523  }
1524 }
1525 
1527  const TargetRegisterClass *RC, unsigned SubIdx) const {
1528  if (SubIdx == AMDGPU::NoSubRegister)
1529  return RC;
1530 
1531  // We can assume that each lane corresponds to one 32-bit register.
1532  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1533  if (isSGPRClass(RC)) {
1534  switch (Count) {
1535  case 1:
1536  return &AMDGPU::SGPR_32RegClass;
1537  case 2:
1538  return &AMDGPU::SReg_64RegClass;
1539  case 3:
1540  return &AMDGPU::SReg_96RegClass;
1541  case 4:
1542  return &AMDGPU::SReg_128RegClass;
1543  case 5:
1544  return &AMDGPU::SReg_160RegClass;
1545  case 8:
1546  return &AMDGPU::SReg_256RegClass;
1547  case 16:
1548  return &AMDGPU::SReg_512RegClass;
1549  case 32: /* fall-through */
1550  default:
1551  llvm_unreachable("Invalid sub-register class size");
1552  }
1553  } else if (hasAGPRs(RC)) {
1554  switch (Count) {
1555  case 1:
1556  return &AMDGPU::AGPR_32RegClass;
1557  case 2:
1558  return &AMDGPU::AReg_64RegClass;
1559  case 4:
1560  return &AMDGPU::AReg_128RegClass;
1561  case 16:
1562  return &AMDGPU::AReg_512RegClass;
1563  case 32: /* fall-through */
1564  default:
1565  llvm_unreachable("Invalid sub-register class size");
1566  }
1567  } else {
1568  switch (Count) {
1569  case 1:
1570  return &AMDGPU::VGPR_32RegClass;
1571  case 2:
1572  return &AMDGPU::VReg_64RegClass;
1573  case 3:
1574  return &AMDGPU::VReg_96RegClass;
1575  case 4:
1576  return &AMDGPU::VReg_128RegClass;
1577  case 5:
1578  return &AMDGPU::VReg_160RegClass;
1579  case 8:
1580  return &AMDGPU::VReg_256RegClass;
1581  case 16:
1582  return &AMDGPU::VReg_512RegClass;
1583  case 32: /* fall-through */
1584  default:
1585  llvm_unreachable("Invalid sub-register class size");
1586  }
1587  }
1588 }
1589 
1591  const TargetRegisterClass *DefRC,
1592  unsigned DefSubReg,
1593  const TargetRegisterClass *SrcRC,
1594  unsigned SrcSubReg) const {
1595  // We want to prefer the smallest register class possible, so we don't want to
1596  // stop and rewrite on anything that looks like a subregister
1597  // extract. Operations mostly don't care about the super register class, so we
1598  // only want to stop on the most basic of copies between the same register
1599  // class.
1600  //
1601  // e.g. if we have something like
1602  // %0 = ...
1603  // %1 = ...
1604  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1605  // %3 = COPY %2, sub0
1606  //
1607  // We want to look through the COPY to find:
1608  // => %3 = COPY %0
1609 
1610  // Plain copy.
1611  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1612 }
1613 
1614 /// Returns a register that is not used at any point in the function.
1615 /// If all registers are used, then this function will return
1616 // AMDGPU::NoRegister.
1617 unsigned
1619  const TargetRegisterClass *RC,
1620  const MachineFunction &MF) const {
1621 
1622  for (unsigned Reg : *RC)
1623  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1624  return Reg;
1625  return AMDGPU::NoRegister;
1626 }
1627 
1629  unsigned EltSize) const {
1630  if (EltSize == 4) {
1631  static const int16_t Sub0_31[] = {
1632  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1633  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1634  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1635  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1636  AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
1637  AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
1638  AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
1639  AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
1640  };
1641 
1642  static const int16_t Sub0_15[] = {
1643  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1644  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1645  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1646  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1647  };
1648 
1649  static const int16_t Sub0_7[] = {
1650  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1651  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1652  };
1653 
1654  static const int16_t Sub0_4[] = {
1655  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1656  };
1657 
1658  static const int16_t Sub0_3[] = {
1659  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1660  };
1661 
1662  static const int16_t Sub0_2[] = {
1663  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1664  };
1665 
1666  static const int16_t Sub0_1[] = {
1667  AMDGPU::sub0, AMDGPU::sub1,
1668  };
1669 
1670  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1671  case 32:
1672  return {};
1673  case 64:
1674  return makeArrayRef(Sub0_1);
1675  case 96:
1676  return makeArrayRef(Sub0_2);
1677  case 128:
1678  return makeArrayRef(Sub0_3);
1679  case 160:
1680  return makeArrayRef(Sub0_4);
1681  case 256:
1682  return makeArrayRef(Sub0_7);
1683  case 512:
1684  return makeArrayRef(Sub0_15);
1685  case 1024:
1686  return makeArrayRef(Sub0_31);
1687  default:
1688  llvm_unreachable("unhandled register size");
1689  }
1690  }
1691 
1692  if (EltSize == 8) {
1693  static const int16_t Sub0_31_64[] = {
1694  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1695  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1696  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1697  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1698  AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
1699  AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
1700  AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
1701  AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
1702  };
1703 
1704  static const int16_t Sub0_15_64[] = {
1705  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1706  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1707  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1708  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1709  };
1710 
1711  static const int16_t Sub0_7_64[] = {
1712  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1713  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1714  };
1715 
1716 
1717  static const int16_t Sub0_3_64[] = {
1718  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1719  };
1720 
1721  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1722  case 64:
1723  return {};
1724  case 128:
1725  return makeArrayRef(Sub0_3_64);
1726  case 256:
1727  return makeArrayRef(Sub0_7_64);
1728  case 512:
1729  return makeArrayRef(Sub0_15_64);
1730  case 1024:
1731  return makeArrayRef(Sub0_31_64);
1732  default:
1733  llvm_unreachable("unhandled register size");
1734  }
1735  }
1736 
1737  if (EltSize == 16) {
1738 
1739  static const int16_t Sub0_31_128[] = {
1740  AMDGPU::sub0_sub1_sub2_sub3,
1741  AMDGPU::sub4_sub5_sub6_sub7,
1742  AMDGPU::sub8_sub9_sub10_sub11,
1743  AMDGPU::sub12_sub13_sub14_sub15,
1744  AMDGPU::sub16_sub17_sub18_sub19,
1745  AMDGPU::sub20_sub21_sub22_sub23,
1746  AMDGPU::sub24_sub25_sub26_sub27,
1747  AMDGPU::sub28_sub29_sub30_sub31
1748  };
1749 
1750  static const int16_t Sub0_15_128[] = {
1751  AMDGPU::sub0_sub1_sub2_sub3,
1752  AMDGPU::sub4_sub5_sub6_sub7,
1753  AMDGPU::sub8_sub9_sub10_sub11,
1754  AMDGPU::sub12_sub13_sub14_sub15
1755  };
1756 
1757  static const int16_t Sub0_7_128[] = {
1758  AMDGPU::sub0_sub1_sub2_sub3,
1759  AMDGPU::sub4_sub5_sub6_sub7
1760  };
1761 
1762  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1763  case 128:
1764  return {};
1765  case 256:
1766  return makeArrayRef(Sub0_7_128);
1767  case 512:
1768  return makeArrayRef(Sub0_15_128);
1769  case 1024:
1770  return makeArrayRef(Sub0_31_128);
1771  default:
1772  llvm_unreachable("unhandled register size");
1773  }
1774  }
1775 
1776  assert(EltSize == 32 && "unhandled elt size");
1777 
1778  static const int16_t Sub0_31_256[] = {
1779  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1780  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1781  AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1782  AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1783  };
1784 
1785  static const int16_t Sub0_15_256[] = {
1786  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1787  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1788  };
1789 
1790  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1791  case 256:
1792  return {};
1793  case 512:
1794  return makeArrayRef(Sub0_15_256);
1795  case 1024:
1796  return makeArrayRef(Sub0_31_256);
1797  default:
1798  llvm_unreachable("unhandled register size");
1799  }
1800 }
1801 
1802 const TargetRegisterClass*
1804  unsigned Reg) const {
1806  return MRI.getRegClass(Reg);
1807 
1808  return getPhysRegClass(Reg);
1809 }
1810 
1812  unsigned Reg) const {
1813  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1814  assert(RC && "Register class for the reg not found");
1815  return hasVGPRs(RC);
1816 }
1817 
1819  unsigned Reg) const {
1820  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1821  assert(RC && "Register class for the reg not found");
1822  return hasAGPRs(RC);
1823 }
1824 
1826  const TargetRegisterClass *SrcRC,
1827  unsigned SubReg,
1828  const TargetRegisterClass *DstRC,
1829  unsigned DstSubReg,
1830  const TargetRegisterClass *NewRC,
1831  LiveIntervals &LIS) const {
1832  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1833  unsigned DstSize = getRegSizeInBits(*DstRC);
1834  unsigned NewSize = getRegSizeInBits(*NewRC);
1835 
1836  // Do not increase size of registers beyond dword, we would need to allocate
1837  // adjacent registers and constraint regalloc more than needed.
1838 
1839  // Always allow dword coalescing.
1840  if (SrcSize <= 32 || DstSize <= 32)
1841  return true;
1842 
1843  return NewSize <= DstSize || NewSize <= SrcSize;
1844 }
1845 
1847  MachineFunction &MF) const {
1848 
1849  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1851 
1852  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1853  MF.getFunction());
1854  switch (RC->getID()) {
1855  default:
1856  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1857  case AMDGPU::VGPR_32RegClassID:
1858  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1859  case AMDGPU::SGPR_32RegClassID:
1860  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1861  }
1862 }
1863 
1865  unsigned Idx) const {
1866  if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
1867  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1868  const_cast<MachineFunction &>(MF));
1869 
1870  if (Idx == getSGPRPressureSet())
1871  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1872  const_cast<MachineFunction &>(MF));
1873 
1874  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1875 }
1876 
1877 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1878  static const int Empty[] = { -1 };
1879 
1880  if (hasRegUnit(AMDGPU::M0, RegUnit))
1881  return Empty;
1882  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1883 }
1884 
1886  // Not a callee saved register.
1887  return AMDGPU::SGPR30_SGPR31;
1888 }
1889 
1890 const TargetRegisterClass *
1892  const RegisterBank &RB,
1893  const MachineRegisterInfo &MRI) const {
1894  switch (Size) {
1895  case 1: {
1896  switch (RB.getID()) {
1897  case AMDGPU::VGPRRegBankID:
1898  return &AMDGPU::VGPR_32RegClass;
1899  case AMDGPU::VCCRegBankID:
1900  return isWave32 ?
1901  &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
1902  case AMDGPU::SGPRRegBankID:
1903  return &AMDGPU::SReg_32_XM0RegClass;
1904  case AMDGPU::SCCRegBankID:
1905  // This needs to return an allocatable class, so don't bother returning
1906  // the dummy SCC class.
1907  return &AMDGPU::SReg_32_XM0RegClass;
1908  default:
1909  llvm_unreachable("unknown register bank");
1910  }
1911  }
1912  case 32:
1913  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1914  &AMDGPU::SReg_32_XM0RegClass;
1915  case 64:
1916  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1917  &AMDGPU::SReg_64_XEXECRegClass;
1918  case 96:
1919  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1920  &AMDGPU::SReg_96RegClass;
1921  case 128:
1922  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1923  &AMDGPU::SReg_128RegClass;
1924  case 160:
1925  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1926  &AMDGPU::SReg_160RegClass;
1927  case 256:
1928  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1929  &AMDGPU::SReg_256RegClass;
1930  case 512:
1931  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1932  &AMDGPU::SReg_512RegClass;
1933  default:
1934  if (Size < 32)
1935  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1936  &AMDGPU::SReg_32_XM0RegClass;
1937  return nullptr;
1938  }
1939 }
1940 
1941 const TargetRegisterClass *
1943  const MachineRegisterInfo &MRI) const {
1944  if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()))
1945  return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
1946  return nullptr;
1947 }
1948 
1949 unsigned SIRegisterInfo::getVCC() const {
1950  return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
1951 }
1952 
1953 const TargetRegisterClass *
1954 SIRegisterInfo::getRegClass(unsigned RCID) const {
1955  switch ((int)RCID) {
1956  case AMDGPU::SReg_1RegClassID:
1957  return getBoolRC();
1958  case AMDGPU::SReg_1_XEXECRegClassID:
1959  return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
1960  : &AMDGPU::SReg_64_XEXECRegClass;
1961  case -1:
1962  return nullptr;
1963  default:
1964  return AMDGPURegisterInfo::getRegClass(RCID);
1965  }
1966 }
1967 
1968 // Find reaching register definition
1970  MachineInstr &Use,
1972  LiveIntervals *LIS) const {
1973  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1974  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1975  SlotIndex DefIdx;
1976 
1978  if (!LIS->hasInterval(Reg))
1979  return nullptr;
1980  LiveInterval &LI = LIS->getInterval(Reg);
1981  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1982  : MRI.getMaxLaneMaskForVReg(Reg);
1983  VNInfo *V = nullptr;
1984  if (LI.hasSubRanges()) {
1985  for (auto &S : LI.subranges()) {
1986  if ((S.LaneMask & SubLanes) == SubLanes) {
1987  V = S.getVNInfoAt(UseIdx);
1988  break;
1989  }
1990  }
1991  } else {
1992  V = LI.getVNInfoAt(UseIdx);
1993  }
1994  if (!V)
1995  return nullptr;
1996  DefIdx = V->def;
1997  } else {
1998  // Find last def.
1999  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
2000  LiveRange &LR = LIS->getRegUnit(*Units);
2001  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
2002  if (!DefIdx.isValid() ||
2003  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
2004  LIS->getInstructionFromIndex(V->def)))
2005  DefIdx = V->def;
2006  } else {
2007  return nullptr;
2008  }
2009  }
2010  }
2011 
2012  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2013 
2014  if (!Def || !MDT.dominates(Def, &Use))
2015  return nullptr;
2016 
2017  assert(Def->modifiesRegister(Reg, this));
2018 
2019  return Def;
2020 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
unsigned getVCC() const
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:675
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:832
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:461
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
bool canRealignStack(const MachineFunction &MF) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:722
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:760
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static int getRegClass(RegisterKind Is, unsigned RegWidth)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:788
virtual bool canRealignStack(const MachineFunction &MF) const
True if the stack can be realigned for the target.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:436
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:408
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
bool isAGPRPressureSet(unsigned SetID) const
bool hasAGPRs(const TargetRegisterClass *RC) const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:218
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const TargetRegisterClass & getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const
Get the MinimalPhysRegClass for Reg.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
LiveInterval & getInterval(unsigned Reg)
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:405
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
const TargetRegisterClass * getBoolRC() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getAGPRPressureSet() const
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
static const char * getRegisterName(unsigned RegNo, unsigned AltIdx=AMDGPU::NoRegAltName)
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
bool hasCalls() const
Return true if the current function has any function calls.