LLVM  9.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/LLVMContext.h"
28 
29 using namespace llvm;
30 
31 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
32  for (unsigned i = 0; PSets[i] != -1; ++i) {
33  if (PSets[i] == (int)PSetID)
34  return true;
35  }
36  return false;
37 }
38 
39 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
40  BitVector &PressureSets) const {
41  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
42  const int *PSets = getRegUnitPressureSets(*U);
43  if (hasPressureSet(PSets, PSetID)) {
44  PressureSets.set(PSetID);
45  break;
46  }
47  }
48 }
49 
51  "amdgpu-spill-sgpr-to-smem",
52  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
53  cl::init(false));
54 
56  "amdgpu-spill-sgpr-to-vgpr",
57  cl::desc("Enable spilling VGPRs to SGPRs"),
59  cl::init(true));
60 
63  SGPRPressureSets(getNumRegPressureSets()),
64  VGPRPressureSets(getNumRegPressureSets()),
65  SpillSGPRToVGPR(false),
66  SpillSGPRToSMEM(false) {
67  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
68  SpillSGPRToSMEM = true;
69  else if (EnableSpillSGPRToVGPR)
70  SpillSGPRToVGPR = true;
71 
72  unsigned NumRegPressureSets = getNumRegPressureSets();
73 
74  SGPRSetID = NumRegPressureSets;
75  VGPRSetID = NumRegPressureSets;
76 
77  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
78  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
79  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
80  }
81 
82  // Determine the number of reg units for each pressure set.
83  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
84  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
85  const int *PSets = getRegUnitPressureSets(i);
86  for (unsigned j = 0; PSets[j] != -1; ++j) {
87  ++PressureSetRegUnits[PSets[j]];
88  }
89  }
90 
91  unsigned VGPRMax = 0, SGPRMax = 0;
92  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
93  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
94  VGPRSetID = i;
95  VGPRMax = PressureSetRegUnits[i];
96  continue;
97  }
98  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
99  SGPRSetID = i;
100  SGPRMax = PressureSetRegUnits[i];
101  }
102  }
103 
104  assert(SGPRSetID < NumRegPressureSets &&
105  VGPRSetID < NumRegPressureSets);
106 }
107 
109  const MachineFunction &MF) const {
110 
111  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
112  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
113  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
114  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
115 }
116 
117 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
118  unsigned Reg;
119 
120  // Try to place it in a hole after PrivateSegmentBufferReg.
121  if (RegCount & 3) {
122  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
123  // alignment constraints, so we have a hole where can put the wave offset.
124  Reg = RegCount - 1;
125  } else {
126  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
127  // wave offset before it.
128  Reg = RegCount - 5;
129  }
130 
131  return Reg;
132 }
133 
135  const MachineFunction &MF) const {
136  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
138  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
139 }
140 
142  const MachineFunction &MF) const {
143  return AMDGPU::SGPR32;
144 }
145 
147  BitVector Reserved(getNumRegs());
148 
149  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
150  // this seems likely to result in bugs, so I'm marking them as reserved.
151  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
152  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
153 
154  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
155  reserveRegisterTuples(Reserved, AMDGPU::M0);
156 
157  // Reserve the memory aperture registers.
158  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
160  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
161  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
162 
163  // Reserve xnack_mask registers - support is not implemented in Codegen.
164  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
165 
166  // Reserve Trap Handler registers - support is not implemented in Codegen.
167  reserveRegisterTuples(Reserved, AMDGPU::TBA);
168  reserveRegisterTuples(Reserved, AMDGPU::TMA);
169  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
170  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
171  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
172  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
173  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
174  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
175  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
176  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
177 
178  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
179 
180  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
181  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
182  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
183  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
184  reserveRegisterTuples(Reserved, Reg);
185  }
186 
187  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
188  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
189  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
190  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
191  reserveRegisterTuples(Reserved, Reg);
192  }
193 
195 
196  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
197  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
198  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
199  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
200  }
201 
202  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
203  if (ScratchRSrcReg != AMDGPU::NoRegister) {
204  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
205  // to spill.
206  // TODO: May need to reserve a VGPR if doing LDS spilling.
207  reserveRegisterTuples(Reserved, ScratchRSrcReg);
208  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
209  }
210 
211  // We have to assume the SP is needed in case there are calls in the function,
212  // which is detected after the function is lowered. If we aren't really going
213  // to need SP, don't bother reserving it.
214  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
215 
216  if (StackPtrReg != AMDGPU::NoRegister) {
217  reserveRegisterTuples(Reserved, StackPtrReg);
218  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
219  }
220 
221  unsigned FrameReg = MFI->getFrameOffsetReg();
222  if (FrameReg != AMDGPU::NoRegister) {
223  reserveRegisterTuples(Reserved, FrameReg);
224  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
225  }
226 
227  return Reserved;
228 }
229 
232  if (Info->isEntryFunction()) {
233  const MachineFrameInfo &MFI = Fn.getFrameInfo();
234  return MFI.hasStackObjects() || MFI.hasCalls();
235  }
236 
237  // May need scavenger for dealing with callee saved registers.
238  return true;
239 }
240 
242  const MachineFunction &MF) const {
243  const MachineFrameInfo &MFI = MF.getFrameInfo();
244  if (MFI.hasStackObjects())
245  return true;
246 
247  // May need to deal with callee saved registers.
249  return !Info->isEntryFunction();
250 }
251 
253  const MachineFunction &MF) const {
254  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
255  // create a virtual register for it during frame index elimination, so the
256  // scavenger is directly needed.
257  return MF.getFrameInfo().hasStackObjects() &&
258  MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
259  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
260 }
261 
263  const MachineFunction &) const {
264  // There are no special dedicated stack or frame pointers.
265  return true;
266 }
267 
269  // This helps catch bugs as verifier errors.
270  return true;
271 }
272 
275 
276  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
277  AMDGPU::OpName::offset);
278  return MI->getOperand(OffIdx).getImm();
279 }
280 
282  int Idx) const {
283  if (!SIInstrInfo::isMUBUF(*MI))
284  return 0;
285 
287  AMDGPU::OpName::vaddr) &&
288  "Should never see frame index on non-address operand");
289 
290  return getMUBUFInstrOffset(MI);
291 }
292 
294  if (!MI->mayLoadOrStore())
295  return false;
296 
297  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
298 
299  return !isUInt<12>(FullOffset);
300 }
301 
303  unsigned BaseReg,
304  int FrameIdx,
305  int64_t Offset) const {
307  DebugLoc DL; // Defaults to "unknown"
308 
309  if (Ins != MBB->end())
310  DL = Ins->getDebugLoc();
311 
312  MachineFunction *MF = MBB->getParent();
313  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
314  const SIInstrInfo *TII = Subtarget.getInstrInfo();
315 
316  if (Offset == 0) {
317  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
318  .addFrameIndex(FrameIdx);
319  return;
320  }
321 
323  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
324 
325  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
326 
327  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
328  .addImm(Offset);
329  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
330  .addFrameIndex(FrameIdx);
331 
332  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
333  .addReg(OffsetReg, RegState::Kill)
334  .addReg(FIReg);
335 }
336 
338  int64_t Offset) const {
339 
340  MachineBasicBlock *MBB = MI.getParent();
341  MachineFunction *MF = MBB->getParent();
342  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
343  const SIInstrInfo *TII = Subtarget.getInstrInfo();
344 
345 #ifndef NDEBUG
346  // FIXME: Is it possible to be storing a frame index to itself?
347  bool SeenFI = false;
348  for (const MachineOperand &MO: MI.operands()) {
349  if (MO.isFI()) {
350  if (SeenFI)
351  llvm_unreachable("should not see multiple frame indices");
352 
353  SeenFI = true;
354  }
355  }
356 #endif
357 
358  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
359  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
360  assert(TII->isMUBUF(MI));
361  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
362  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
363  "should only be seeing frame offset relative FrameIndex");
364 
365 
366  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
367  int64_t NewOffset = OffsetOp->getImm() + Offset;
368  assert(isUInt<12>(NewOffset) && "offset should be legal");
369 
370  FIOp->ChangeToRegister(BaseReg, false);
371  OffsetOp->setImm(NewOffset);
372 }
373 
375  unsigned BaseReg,
376  int64_t Offset) const {
377  if (!SIInstrInfo::isMUBUF(*MI))
378  return false;
379 
380  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
381 
382  return isUInt<12>(NewOffset);
383 }
384 
386  const MachineFunction &MF, unsigned Kind) const {
387  // This is inaccurate. It depends on the instruction and address space. The
388  // only place where we should hit this is for dealing with frame indexes /
389  // private accesses, so this is correct in that case.
390  return &AMDGPU::VGPR_32RegClass;
391 }
392 
393 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
394 
395  switch (Op) {
396  case AMDGPU::SI_SPILL_S512_SAVE:
397  case AMDGPU::SI_SPILL_S512_RESTORE:
398  case AMDGPU::SI_SPILL_V512_SAVE:
399  case AMDGPU::SI_SPILL_V512_RESTORE:
400  return 16;
401  case AMDGPU::SI_SPILL_S256_SAVE:
402  case AMDGPU::SI_SPILL_S256_RESTORE:
403  case AMDGPU::SI_SPILL_V256_SAVE:
404  case AMDGPU::SI_SPILL_V256_RESTORE:
405  return 8;
406  case AMDGPU::SI_SPILL_S128_SAVE:
407  case AMDGPU::SI_SPILL_S128_RESTORE:
408  case AMDGPU::SI_SPILL_V128_SAVE:
409  case AMDGPU::SI_SPILL_V128_RESTORE:
410  return 4;
411  case AMDGPU::SI_SPILL_V96_SAVE:
412  case AMDGPU::SI_SPILL_V96_RESTORE:
413  return 3;
414  case AMDGPU::SI_SPILL_S64_SAVE:
415  case AMDGPU::SI_SPILL_S64_RESTORE:
416  case AMDGPU::SI_SPILL_V64_SAVE:
417  case AMDGPU::SI_SPILL_V64_RESTORE:
418  return 2;
419  case AMDGPU::SI_SPILL_S32_SAVE:
420  case AMDGPU::SI_SPILL_S32_RESTORE:
421  case AMDGPU::SI_SPILL_V32_SAVE:
422  case AMDGPU::SI_SPILL_V32_RESTORE:
423  return 1;
424  default: llvm_unreachable("Invalid spill opcode");
425  }
426 }
427 
428 static int getOffsetMUBUFStore(unsigned Opc) {
429  switch (Opc) {
430  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
431  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
432  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
433  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
434  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
435  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
436  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
437  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
438  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
439  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
440  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
441  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
442  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
443  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
444  default:
445  return -1;
446  }
447 }
448 
449 static int getOffsetMUBUFLoad(unsigned Opc) {
450  switch (Opc) {
451  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
452  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
453  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
454  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
455  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
456  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
457  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
458  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
459  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
460  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
461  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
462  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
463  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
464  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
465  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
466  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
467  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
468  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
469  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
470  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
471  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
472  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
473  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
474  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
475  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
476  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
477  default:
478  return -1;
479  }
480 }
481 
482 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
483 // need to handle the case where an SGPR may need to be spilled while spilling.
485  MachineFrameInfo &MFI,
487  int Index,
488  int64_t Offset) {
489  MachineBasicBlock *MBB = MI->getParent();
490  const DebugLoc &DL = MI->getDebugLoc();
491  bool IsStore = MI->mayStore();
492 
493  unsigned Opc = MI->getOpcode();
494  int LoadStoreOp = IsStore ?
496  if (LoadStoreOp == -1)
497  return false;
498 
499  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
500  MachineInstrBuilder NewMI =
501  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
502  .add(*Reg)
503  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
504  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
505  .addImm(Offset)
506  .addImm(0) // glc
507  .addImm(0) // slc
508  .addImm(0) // tfe
509  .cloneMemRefs(*MI);
510 
511  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
512  AMDGPU::OpName::vdata_in);
513  if (VDataIn)
514  NewMI.add(*VDataIn);
515  return true;
516 }
517 
518 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
519  unsigned LoadStoreOp,
520  int Index,
521  unsigned ValueReg,
522  bool IsKill,
523  unsigned ScratchRsrcReg,
524  unsigned ScratchOffsetReg,
525  int64_t InstOffset,
526  MachineMemOperand *MMO,
527  RegScavenger *RS) const {
528  MachineBasicBlock *MBB = MI->getParent();
529  MachineFunction *MF = MI->getParent()->getParent();
530  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
531  const SIInstrInfo *TII = ST.getInstrInfo();
532  const MachineFrameInfo &MFI = MF->getFrameInfo();
533 
534  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
535  const DebugLoc &DL = MI->getDebugLoc();
536  bool IsStore = Desc.mayStore();
537 
538  bool Scavenged = false;
539  unsigned SOffset = ScratchOffsetReg;
540 
541  const unsigned EltSize = 4;
542  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
543  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
544  unsigned Size = NumSubRegs * EltSize;
545  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
546  int64_t ScratchOffsetRegDelta = 0;
547 
548  unsigned Align = MFI.getObjectAlignment(Index);
549  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
550 
551  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
552 
553  if (!isUInt<12>(Offset + Size - EltSize)) {
554  SOffset = AMDGPU::NoRegister;
555 
556  // We currently only support spilling VGPRs to EltSize boundaries, meaning
557  // we can simplify the adjustment of Offset here to just scale with
558  // WavefrontSize.
559  Offset *= ST.getWavefrontSize();
560 
561  // We don't have access to the register scavenger if this function is called
562  // during PEI::scavengeFrameVirtualRegs().
563  if (RS)
564  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
565 
566  if (SOffset == AMDGPU::NoRegister) {
567  // There are no free SGPRs, and since we are in the process of spilling
568  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
569  // on SI/CI and on VI it is true until we implement spilling using scalar
570  // stores), we have no way to free up an SGPR. Our solution here is to
571  // add the offset directly to the ScratchOffset register, and then
572  // subtract the offset after the spill to return ScratchOffset to it's
573  // original value.
574  SOffset = ScratchOffsetReg;
575  ScratchOffsetRegDelta = Offset;
576  } else {
577  Scavenged = true;
578  }
579 
580  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
581  .addReg(ScratchOffsetReg)
582  .addImm(Offset);
583 
584  Offset = 0;
585  }
586 
587  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
588  unsigned SubReg = NumSubRegs == 1 ?
589  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
590 
591  unsigned SOffsetRegState = 0;
592  unsigned SrcDstRegState = getDefRegState(!IsStore);
593  if (i + 1 == e) {
594  SOffsetRegState |= getKillRegState(Scavenged);
595  // The last implicit use carries the "Kill" flag.
596  SrcDstRegState |= getKillRegState(IsKill);
597  }
598 
599  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
600  MachineMemOperand *NewMMO
601  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
602  EltSize, MinAlign(Align, EltSize * i));
603 
604  auto MIB = BuildMI(*MBB, MI, DL, Desc)
605  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
606  .addReg(ScratchRsrcReg)
607  .addReg(SOffset, SOffsetRegState)
608  .addImm(Offset)
609  .addImm(0) // glc
610  .addImm(0) // slc
611  .addImm(0) // tfe
612  .addMemOperand(NewMMO);
613 
614  if (NumSubRegs > 1)
615  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
616  }
617 
618  if (ScratchOffsetRegDelta != 0) {
619  // Subtract the offset we added to the ScratchOffset register.
620  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
621  .addReg(ScratchOffsetReg)
622  .addImm(ScratchOffsetRegDelta);
623  }
624 }
625 
626 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
627  bool Store) {
628  if (SuperRegSize % 16 == 0) {
629  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
630  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
631  }
632 
633  if (SuperRegSize % 8 == 0) {
634  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
635  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
636  }
637 
638  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
639  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
640 }
641 
643  int Index,
644  RegScavenger *RS,
645  bool OnlyToVGPR) const {
646  MachineBasicBlock *MBB = MI->getParent();
647  MachineFunction *MF = MBB->getParent();
649  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
650 
652  = MFI->getSGPRToVGPRSpills(Index);
653  bool SpillToVGPR = !VGPRSpills.empty();
654  if (OnlyToVGPR && !SpillToVGPR)
655  return false;
656 
658  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
659  const SIInstrInfo *TII = ST.getInstrInfo();
660 
661  unsigned SuperReg = MI->getOperand(0).getReg();
662  bool IsKill = MI->getOperand(0).isKill();
663  const DebugLoc &DL = MI->getDebugLoc();
664 
665  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
666 
667  bool SpillToSMEM = spillSGPRToSMEM();
668  if (SpillToSMEM && OnlyToVGPR)
669  return false;
670 
671  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
672  SuperReg != MFI->getFrameOffsetReg() &&
673  SuperReg != MFI->getScratchWaveOffsetReg()));
674 
675  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
676 
677  unsigned OffsetReg = AMDGPU::M0;
678  unsigned M0CopyReg = AMDGPU::NoRegister;
679 
680  if (SpillToSMEM) {
681  if (RS->isRegUsed(AMDGPU::M0)) {
682  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
683  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
684  .addReg(AMDGPU::M0);
685  }
686  }
687 
688  unsigned ScalarStoreOp;
689  unsigned EltSize = 4;
690  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
691  if (SpillToSMEM && isSGPRClass(RC)) {
692  // XXX - if private_element_size is larger than 4 it might be useful to be
693  // able to spill wider vmem spills.
694  std::tie(EltSize, ScalarStoreOp) =
695  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
696  }
697 
698  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
699  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
700 
701  // SubReg carries the "Kill" flag when SubReg == SuperReg.
702  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
703  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
704  unsigned SubReg = NumSubRegs == 1 ?
705  SuperReg : getSubReg(SuperReg, SplitParts[i]);
706 
707  if (SpillToSMEM) {
708  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
709 
710  // The allocated memory size is really the wavefront size * the frame
711  // index size. The widest register class is 64 bytes, so a 4-byte scratch
712  // allocation is enough to spill this in a single stack object.
713  //
714  // FIXME: Frame size/offsets are computed earlier than this, so the extra
715  // space is still unnecessarily allocated.
716 
717  unsigned Align = FrameInfo.getObjectAlignment(Index);
718  MachinePointerInfo PtrInfo
719  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
720  MachineMemOperand *MMO
722  EltSize, MinAlign(Align, EltSize * i));
723 
724  // SMEM instructions only support a single offset, so increment the wave
725  // offset.
726 
727  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
728  if (Offset != 0) {
729  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
730  .addReg(MFI->getFrameOffsetReg())
731  .addImm(Offset);
732  } else {
733  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
734  .addReg(MFI->getFrameOffsetReg());
735  }
736 
737  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
738  .addReg(SubReg, getKillRegState(IsKill)) // sdata
739  .addReg(MFI->getScratchRSrcReg()) // sbase
740  .addReg(OffsetReg, RegState::Kill) // soff
741  .addImm(0) // glc
742  .addMemOperand(MMO);
743 
744  continue;
745  }
746 
747  if (SpillToVGPR) {
748  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
749 
750  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
751  // only circumstance in which we say it is undefined is when it is the
752  // first spill to this VGPR in the first basic block.
753  bool VGPRDefined = true;
754  if (MBB == &MF->front())
755  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
756 
757  // Mark the "old value of vgpr" input undef only if this is the first sgpr
758  // spill to this specific vgpr in the first basic block.
759  BuildMI(*MBB, MI, DL,
760  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
761  Spill.VGPR)
762  .addReg(SubReg, getKillRegState(IsKill))
763  .addImm(Spill.Lane)
764  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
765 
766  // FIXME: Since this spills to another register instead of an actual
767  // frame index, we should delete the frame index when all references to
768  // it are fixed.
769  } else {
770  // XXX - Can to VGPR spill fail for some subregisters but not others?
771  if (OnlyToVGPR)
772  return false;
773 
774  // Spill SGPR to a frame index.
775  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
776  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
777  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
778 
780  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
781  .addReg(SubReg, SubKillState);
782 
783 
784  // There could be undef components of a spilled super register.
785  // TODO: Can we detect this and skip the spill?
786  if (NumSubRegs > 1) {
787  // The last implicit use of the SuperReg carries the "Kill" flag.
788  unsigned SuperKillState = 0;
789  if (i + 1 == e)
790  SuperKillState |= getKillRegState(IsKill);
791  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
792  }
793 
794  unsigned Align = FrameInfo.getObjectAlignment(Index);
795  MachinePointerInfo PtrInfo
796  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
797  MachineMemOperand *MMO
799  EltSize, MinAlign(Align, EltSize * i));
800  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
801  .addReg(TmpReg, RegState::Kill) // src
802  .addFrameIndex(Index) // vaddr
803  .addReg(MFI->getScratchRSrcReg()) // srrsrc
804  .addReg(MFI->getFrameOffsetReg()) // soffset
805  .addImm(i * 4) // offset
806  .addMemOperand(MMO);
807  }
808  }
809 
810  if (M0CopyReg != AMDGPU::NoRegister) {
811  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
812  .addReg(M0CopyReg, RegState::Kill);
813  }
814 
815  MI->eraseFromParent();
816  MFI->addToSpilledSGPRs(NumSubRegs);
817  return true;
818 }
819 
821  int Index,
822  RegScavenger *RS,
823  bool OnlyToVGPR) const {
824  MachineFunction *MF = MI->getParent()->getParent();
826  MachineBasicBlock *MBB = MI->getParent();
828 
830  = MFI->getSGPRToVGPRSpills(Index);
831  bool SpillToVGPR = !VGPRSpills.empty();
832  if (OnlyToVGPR && !SpillToVGPR)
833  return false;
834 
835  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
836  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
837  const SIInstrInfo *TII = ST.getInstrInfo();
838  const DebugLoc &DL = MI->getDebugLoc();
839 
840  unsigned SuperReg = MI->getOperand(0).getReg();
841  bool SpillToSMEM = spillSGPRToSMEM();
842  if (SpillToSMEM && OnlyToVGPR)
843  return false;
844 
845  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
846 
847  unsigned OffsetReg = AMDGPU::M0;
848  unsigned M0CopyReg = AMDGPU::NoRegister;
849 
850  if (SpillToSMEM) {
851  if (RS->isRegUsed(AMDGPU::M0)) {
852  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
853  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
854  .addReg(AMDGPU::M0);
855  }
856  }
857 
858  unsigned EltSize = 4;
859  unsigned ScalarLoadOp;
860 
861  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
862  if (SpillToSMEM && isSGPRClass(RC)) {
863  // XXX - if private_element_size is larger than 4 it might be useful to be
864  // able to spill wider vmem spills.
865  std::tie(EltSize, ScalarLoadOp) =
866  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
867  }
868 
869  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
870  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
871 
872  // SubReg carries the "Kill" flag when SubReg == SuperReg.
873  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
874 
875  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
876  unsigned SubReg = NumSubRegs == 1 ?
877  SuperReg : getSubReg(SuperReg, SplitParts[i]);
878 
879  if (SpillToSMEM) {
880  // FIXME: Size may be > 4 but extra bytes wasted.
881  unsigned Align = FrameInfo.getObjectAlignment(Index);
882  MachinePointerInfo PtrInfo
883  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
884  MachineMemOperand *MMO
886  EltSize, MinAlign(Align, EltSize * i));
887 
888  // Add i * 4 offset
889  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
890  if (Offset != 0) {
891  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
892  .addReg(MFI->getFrameOffsetReg())
893  .addImm(Offset);
894  } else {
895  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
896  .addReg(MFI->getFrameOffsetReg());
897  }
898 
899  auto MIB =
900  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
901  .addReg(MFI->getScratchRSrcReg()) // sbase
902  .addReg(OffsetReg, RegState::Kill) // soff
903  .addImm(0) // glc
904  .addMemOperand(MMO);
905 
906  if (NumSubRegs > 1 && i == 0)
907  MIB.addReg(SuperReg, RegState::ImplicitDefine);
908 
909  continue;
910  }
911 
912  if (SpillToVGPR) {
913  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
914  auto MIB =
915  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
916  SubReg)
917  .addReg(Spill.VGPR)
918  .addImm(Spill.Lane);
919 
920  if (NumSubRegs > 1 && i == 0)
921  MIB.addReg(SuperReg, RegState::ImplicitDefine);
922  } else {
923  if (OnlyToVGPR)
924  return false;
925 
926  // Restore SGPR from a stack slot.
927  // FIXME: We should use S_LOAD_DWORD here for VI.
928  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
929  unsigned Align = FrameInfo.getObjectAlignment(Index);
930 
931  MachinePointerInfo PtrInfo
932  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
933 
934  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
935  MachineMemOperand::MOLoad, EltSize,
936  MinAlign(Align, EltSize * i));
937 
938  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
939  .addFrameIndex(Index) // vaddr
940  .addReg(MFI->getScratchRSrcReg()) // srsrc
941  .addReg(MFI->getFrameOffsetReg()) // soffset
942  .addImm(i * 4) // offset
943  .addMemOperand(MMO);
944 
945  auto MIB =
946  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
947  .addReg(TmpReg, RegState::Kill);
948 
949  if (NumSubRegs > 1)
950  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
951  }
952  }
953 
954  if (M0CopyReg != AMDGPU::NoRegister) {
955  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
956  .addReg(M0CopyReg, RegState::Kill);
957  }
958 
959  MI->eraseFromParent();
960  return true;
961 }
962 
963 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
964 /// a VGPR and the stack slot can be safely eliminated when all other users are
965 /// handled.
968  int FI,
969  RegScavenger *RS) const {
970  switch (MI->getOpcode()) {
971  case AMDGPU::SI_SPILL_S512_SAVE:
972  case AMDGPU::SI_SPILL_S256_SAVE:
973  case AMDGPU::SI_SPILL_S128_SAVE:
974  case AMDGPU::SI_SPILL_S64_SAVE:
975  case AMDGPU::SI_SPILL_S32_SAVE:
976  return spillSGPR(MI, FI, RS, true);
977  case AMDGPU::SI_SPILL_S512_RESTORE:
978  case AMDGPU::SI_SPILL_S256_RESTORE:
979  case AMDGPU::SI_SPILL_S128_RESTORE:
980  case AMDGPU::SI_SPILL_S64_RESTORE:
981  case AMDGPU::SI_SPILL_S32_RESTORE:
982  return restoreSGPR(MI, FI, RS, true);
983  default:
984  llvm_unreachable("not an SGPR spill instruction");
985  }
986 }
987 
989  int SPAdj, unsigned FIOperandNum,
990  RegScavenger *RS) const {
991  MachineFunction *MF = MI->getParent()->getParent();
993  MachineBasicBlock *MBB = MI->getParent();
995  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
996  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
997  const SIInstrInfo *TII = ST.getInstrInfo();
998  DebugLoc DL = MI->getDebugLoc();
999 
1000  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1001  int Index = MI->getOperand(FIOperandNum).getIndex();
1002 
1003  switch (MI->getOpcode()) {
1004  // SGPR register spill
1005  case AMDGPU::SI_SPILL_S512_SAVE:
1006  case AMDGPU::SI_SPILL_S256_SAVE:
1007  case AMDGPU::SI_SPILL_S128_SAVE:
1008  case AMDGPU::SI_SPILL_S64_SAVE:
1009  case AMDGPU::SI_SPILL_S32_SAVE: {
1010  spillSGPR(MI, Index, RS);
1011  break;
1012  }
1013 
1014  // SGPR register restore
1015  case AMDGPU::SI_SPILL_S512_RESTORE:
1016  case AMDGPU::SI_SPILL_S256_RESTORE:
1017  case AMDGPU::SI_SPILL_S128_RESTORE:
1018  case AMDGPU::SI_SPILL_S64_RESTORE:
1019  case AMDGPU::SI_SPILL_S32_RESTORE: {
1020  restoreSGPR(MI, Index, RS);
1021  break;
1022  }
1023 
1024  // VGPR register spill
1025  case AMDGPU::SI_SPILL_V512_SAVE:
1026  case AMDGPU::SI_SPILL_V256_SAVE:
1027  case AMDGPU::SI_SPILL_V128_SAVE:
1028  case AMDGPU::SI_SPILL_V96_SAVE:
1029  case AMDGPU::SI_SPILL_V64_SAVE:
1030  case AMDGPU::SI_SPILL_V32_SAVE: {
1031  const MachineOperand *VData = TII->getNamedOperand(*MI,
1032  AMDGPU::OpName::vdata);
1033  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1034  Index,
1035  VData->getReg(), VData->isKill(),
1036  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1037  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1038  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1039  *MI->memoperands_begin(),
1040  RS);
1041  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1042  MI->eraseFromParent();
1043  break;
1044  }
1045  case AMDGPU::SI_SPILL_V32_RESTORE:
1046  case AMDGPU::SI_SPILL_V64_RESTORE:
1047  case AMDGPU::SI_SPILL_V96_RESTORE:
1048  case AMDGPU::SI_SPILL_V128_RESTORE:
1049  case AMDGPU::SI_SPILL_V256_RESTORE:
1050  case AMDGPU::SI_SPILL_V512_RESTORE: {
1051  const MachineOperand *VData = TII->getNamedOperand(*MI,
1052  AMDGPU::OpName::vdata);
1053 
1054  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1055  Index,
1056  VData->getReg(), VData->isKill(),
1057  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1058  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1059  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1060  *MI->memoperands_begin(),
1061  RS);
1062  MI->eraseFromParent();
1063  break;
1064  }
1065 
1066  default: {
1067  const DebugLoc &DL = MI->getDebugLoc();
1068  bool IsMUBUF = TII->isMUBUF(*MI);
1069 
1070  if (!IsMUBUF &&
1071  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1072  // Convert to an absolute stack address by finding the offset from the
1073  // scratch wave base and scaling by the wave size.
1074  //
1075  // In an entry function/kernel the stack address is already the
1076  // absolute address relative to the scratch wave offset.
1077 
1078  unsigned DiffReg
1079  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1080 
1081  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1082  unsigned ResultReg = IsCopy ?
1083  MI->getOperand(0).getReg() :
1084  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1085 
1086  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1087  .addReg(MFI->getFrameOffsetReg())
1088  .addReg(MFI->getScratchWaveOffsetReg());
1089 
1090  int64_t Offset = FrameInfo.getObjectOffset(Index);
1091  if (Offset == 0) {
1092  // XXX - This never happens because of emergency scavenging slot at 0?
1093  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1094  .addImm(Log2_32(ST.getWavefrontSize()))
1095  .addReg(DiffReg);
1096  } else {
1097  unsigned ScaledReg
1098  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1099 
1100  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1101  .addImm(Log2_32(ST.getWavefrontSize()))
1102  .addReg(DiffReg, RegState::Kill);
1103 
1104  // TODO: Fold if use instruction is another add of a constant.
1106  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1107  .addImm(Offset)
1108  .addReg(ScaledReg, RegState::Kill);
1109  } else {
1110  unsigned ConstOffsetReg
1111  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1112 
1113  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1114  .addImm(Offset);
1115  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1116  .addReg(ConstOffsetReg, RegState::Kill)
1117  .addReg(ScaledReg, RegState::Kill);
1118  }
1119  }
1120 
1121  // Don't introduce an extra copy if we're just materializing in a mov.
1122  if (IsCopy)
1123  MI->eraseFromParent();
1124  else
1125  FIOp.ChangeToRegister(ResultReg, false, false, true);
1126  return;
1127  }
1128 
1129  if (IsMUBUF) {
1130  // Disable offen so we don't need a 0 vgpr base.
1131  assert(static_cast<int>(FIOperandNum) ==
1132  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1133  AMDGPU::OpName::vaddr));
1134 
1135  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1136  == MFI->getFrameOffsetReg());
1137 
1138  int64_t Offset = FrameInfo.getObjectOffset(Index);
1139  int64_t OldImm
1140  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1141  int64_t NewOffset = OldImm + Offset;
1142 
1143  if (isUInt<12>(NewOffset) &&
1144  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1145  MI->eraseFromParent();
1146  return;
1147  }
1148  }
1149 
1150  // If the offset is simply too big, don't convert to a scratch wave offset
1151  // relative index.
1152 
1153  int64_t Offset = FrameInfo.getObjectOffset(Index);
1154  FIOp.ChangeToImmediate(Offset);
1155  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1156  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1157  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1158  .addImm(Offset);
1159  FIOp.ChangeToRegister(TmpReg, false, false, true);
1160  }
1161  }
1162  }
1163 }
1164 
1166  #define AMDGPU_REG_ASM_NAMES
1167  #include "AMDGPURegAsmNames.inc.cpp"
1168 
1169  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1170  if (Reg >= BeginReg && Reg <= EndReg) { \
1171  unsigned Index = Reg - BeginReg; \
1172  assert(Index < array_lengthof(RegTable)); \
1173  return RegTable[Index]; \
1174  }
1175 
1176  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1177  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1178  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1179  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1180  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1181  VGPR96RegNames);
1182 
1183  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1184  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1185  VGPR128RegNames);
1186  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1187  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1188  SGPR128RegNames);
1189 
1190  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1191  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1192  VGPR256RegNames);
1193 
1194  REG_RANGE(
1195  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1196  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1197  VGPR512RegNames);
1198 
1199  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1200  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1201  SGPR256RegNames);
1202 
1203  REG_RANGE(
1204  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1205  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1206  SGPR512RegNames
1207  );
1208 
1209 #undef REG_RANGE
1210 
1211  // FIXME: Rename flat_scr so we don't need to special case this.
1212  switch (Reg) {
1213  case AMDGPU::FLAT_SCR:
1214  return "flat_scratch";
1215  case AMDGPU::FLAT_SCR_LO:
1216  return "flat_scratch_lo";
1217  case AMDGPU::FLAT_SCR_HI:
1218  return "flat_scratch_hi";
1219  default:
1220  // For the special named registers the default is fine.
1222  }
1223 }
1224 
1225 // FIXME: This is very slow. It might be worth creating a map from physreg to
1226 // register class.
1229 
1230  static const TargetRegisterClass *const BaseClasses[] = {
1231  &AMDGPU::VGPR_32RegClass,
1232  &AMDGPU::SReg_32RegClass,
1233  &AMDGPU::VReg_64RegClass,
1234  &AMDGPU::SReg_64RegClass,
1235  &AMDGPU::VReg_96RegClass,
1236  &AMDGPU::VReg_128RegClass,
1237  &AMDGPU::SReg_128RegClass,
1238  &AMDGPU::VReg_256RegClass,
1239  &AMDGPU::SReg_256RegClass,
1240  &AMDGPU::VReg_512RegClass,
1241  &AMDGPU::SReg_512RegClass,
1242  &AMDGPU::SCC_CLASSRegClass,
1243  &AMDGPU::Pseudo_SReg_32RegClass,
1244  &AMDGPU::Pseudo_SReg_128RegClass,
1245  };
1246 
1247  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1248  if (BaseClass->contains(Reg)) {
1249  return BaseClass;
1250  }
1251  }
1252  return nullptr;
1253 }
1254 
1255 // TODO: It might be helpful to have some target specific flags in
1256 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1258  unsigned Size = getRegSizeInBits(*RC);
1259  if (Size < 32)
1260  return false;
1261  switch (Size) {
1262  case 32:
1263  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1264  case 64:
1265  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1266  case 96:
1267  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1268  case 128:
1269  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1270  case 256:
1271  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1272  case 512:
1273  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1274  default:
1275  llvm_unreachable("Invalid register class size");
1276  }
1277 }
1278 
1280  const TargetRegisterClass *SRC) const {
1281  switch (getRegSizeInBits(*SRC)) {
1282  case 32:
1283  return &AMDGPU::VGPR_32RegClass;
1284  case 64:
1285  return &AMDGPU::VReg_64RegClass;
1286  case 96:
1287  return &AMDGPU::VReg_96RegClass;
1288  case 128:
1289  return &AMDGPU::VReg_128RegClass;
1290  case 256:
1291  return &AMDGPU::VReg_256RegClass;
1292  case 512:
1293  return &AMDGPU::VReg_512RegClass;
1294  default:
1295  llvm_unreachable("Invalid register class size");
1296  }
1297 }
1298 
1300  const TargetRegisterClass *VRC) const {
1301  switch (getRegSizeInBits(*VRC)) {
1302  case 32:
1303  return &AMDGPU::SGPR_32RegClass;
1304  case 64:
1305  return &AMDGPU::SReg_64RegClass;
1306  case 128:
1307  return &AMDGPU::SReg_128RegClass;
1308  case 256:
1309  return &AMDGPU::SReg_256RegClass;
1310  case 512:
1311  return &AMDGPU::SReg_512RegClass;
1312  default:
1313  llvm_unreachable("Invalid register class size");
1314  }
1315 }
1316 
1318  const TargetRegisterClass *RC, unsigned SubIdx) const {
1319  if (SubIdx == AMDGPU::NoSubRegister)
1320  return RC;
1321 
1322  // We can assume that each lane corresponds to one 32-bit register.
1323  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1324  if (isSGPRClass(RC)) {
1325  switch (Count) {
1326  case 1:
1327  return &AMDGPU::SGPR_32RegClass;
1328  case 2:
1329  return &AMDGPU::SReg_64RegClass;
1330  case 4:
1331  return &AMDGPU::SReg_128RegClass;
1332  case 8:
1333  return &AMDGPU::SReg_256RegClass;
1334  case 16: /* fall-through */
1335  default:
1336  llvm_unreachable("Invalid sub-register class size");
1337  }
1338  } else {
1339  switch (Count) {
1340  case 1:
1341  return &AMDGPU::VGPR_32RegClass;
1342  case 2:
1343  return &AMDGPU::VReg_64RegClass;
1344  case 3:
1345  return &AMDGPU::VReg_96RegClass;
1346  case 4:
1347  return &AMDGPU::VReg_128RegClass;
1348  case 8:
1349  return &AMDGPU::VReg_256RegClass;
1350  case 16: /* fall-through */
1351  default:
1352  llvm_unreachable("Invalid sub-register class size");
1353  }
1354  }
1355 }
1356 
1358  const TargetRegisterClass *DefRC,
1359  unsigned DefSubReg,
1360  const TargetRegisterClass *SrcRC,
1361  unsigned SrcSubReg) const {
1362  // We want to prefer the smallest register class possible, so we don't want to
1363  // stop and rewrite on anything that looks like a subregister
1364  // extract. Operations mostly don't care about the super register class, so we
1365  // only want to stop on the most basic of copies between the same register
1366  // class.
1367  //
1368  // e.g. if we have something like
1369  // %0 = ...
1370  // %1 = ...
1371  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1372  // %3 = COPY %2, sub0
1373  //
1374  // We want to look through the COPY to find:
1375  // => %3 = COPY %0
1376 
1377  // Plain copy.
1378  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1379 }
1380 
1381 /// Returns a register that is not used at any point in the function.
1382 /// If all registers are used, then this function will return
1383 // AMDGPU::NoRegister.
1384 unsigned
1386  const TargetRegisterClass *RC,
1387  const MachineFunction &MF) const {
1388 
1389  for (unsigned Reg : *RC)
1390  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1391  return Reg;
1392  return AMDGPU::NoRegister;
1393 }
1394 
1396  unsigned EltSize) const {
1397  if (EltSize == 4) {
1398  static const int16_t Sub0_15[] = {
1399  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1400  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1401  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1402  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1403  };
1404 
1405  static const int16_t Sub0_7[] = {
1406  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1407  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1408  };
1409 
1410  static const int16_t Sub0_3[] = {
1411  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1412  };
1413 
1414  static const int16_t Sub0_2[] = {
1415  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1416  };
1417 
1418  static const int16_t Sub0_1[] = {
1419  AMDGPU::sub0, AMDGPU::sub1,
1420  };
1421 
1422  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1423  case 32:
1424  return {};
1425  case 64:
1426  return makeArrayRef(Sub0_1);
1427  case 96:
1428  return makeArrayRef(Sub0_2);
1429  case 128:
1430  return makeArrayRef(Sub0_3);
1431  case 256:
1432  return makeArrayRef(Sub0_7);
1433  case 512:
1434  return makeArrayRef(Sub0_15);
1435  default:
1436  llvm_unreachable("unhandled register size");
1437  }
1438  }
1439 
1440  if (EltSize == 8) {
1441  static const int16_t Sub0_15_64[] = {
1442  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1443  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1444  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1445  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1446  };
1447 
1448  static const int16_t Sub0_7_64[] = {
1449  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1450  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1451  };
1452 
1453 
1454  static const int16_t Sub0_3_64[] = {
1455  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1456  };
1457 
1458  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1459  case 64:
1460  return {};
1461  case 128:
1462  return makeArrayRef(Sub0_3_64);
1463  case 256:
1464  return makeArrayRef(Sub0_7_64);
1465  case 512:
1466  return makeArrayRef(Sub0_15_64);
1467  default:
1468  llvm_unreachable("unhandled register size");
1469  }
1470  }
1471 
1472  assert(EltSize == 16 && "unhandled register spill split size");
1473 
1474  static const int16_t Sub0_15_128[] = {
1475  AMDGPU::sub0_sub1_sub2_sub3,
1476  AMDGPU::sub4_sub5_sub6_sub7,
1477  AMDGPU::sub8_sub9_sub10_sub11,
1478  AMDGPU::sub12_sub13_sub14_sub15
1479  };
1480 
1481  static const int16_t Sub0_7_128[] = {
1482  AMDGPU::sub0_sub1_sub2_sub3,
1483  AMDGPU::sub4_sub5_sub6_sub7
1484  };
1485 
1486  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1487  case 128:
1488  return {};
1489  case 256:
1490  return makeArrayRef(Sub0_7_128);
1491  case 512:
1492  return makeArrayRef(Sub0_15_128);
1493  default:
1494  llvm_unreachable("unhandled register size");
1495  }
1496 }
1497 
1498 const TargetRegisterClass*
1500  unsigned Reg) const {
1502  return MRI.getRegClass(Reg);
1503 
1504  return getPhysRegClass(Reg);
1505 }
1506 
1508  unsigned Reg) const {
1509  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1510  assert(RC && "Register class for the reg not found");
1511  return hasVGPRs(RC);
1512 }
1513 
1515  const TargetRegisterClass *SrcRC,
1516  unsigned SubReg,
1517  const TargetRegisterClass *DstRC,
1518  unsigned DstSubReg,
1519  const TargetRegisterClass *NewRC,
1520  LiveIntervals &LIS) const {
1521  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1522  unsigned DstSize = getRegSizeInBits(*DstRC);
1523  unsigned NewSize = getRegSizeInBits(*NewRC);
1524 
1525  // Do not increase size of registers beyond dword, we would need to allocate
1526  // adjacent registers and constraint regalloc more than needed.
1527 
1528  // Always allow dword coalescing.
1529  if (SrcSize <= 32 || DstSize <= 32)
1530  return true;
1531 
1532  return NewSize <= DstSize || NewSize <= SrcSize;
1533 }
1534 
1536  MachineFunction &MF) const {
1537 
1538  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1540 
1541  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1542  MF.getFunction());
1543  switch (RC->getID()) {
1544  default:
1545  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1546  case AMDGPU::VGPR_32RegClassID:
1547  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1548  case AMDGPU::SGPR_32RegClassID:
1549  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1550  }
1551 }
1552 
1554  unsigned Idx) const {
1555  if (Idx == getVGPRPressureSet())
1556  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1557  const_cast<MachineFunction &>(MF));
1558 
1559  if (Idx == getSGPRPressureSet())
1560  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1561  const_cast<MachineFunction &>(MF));
1562 
1563  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1564 }
1565 
1566 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1567  static const int Empty[] = { -1 };
1568 
1569  if (hasRegUnit(AMDGPU::M0, RegUnit))
1570  return Empty;
1571  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1572 }
1573 
1575  // Not a callee saved register.
1576  return AMDGPU::SGPR30_SGPR31;
1577 }
1578 
1579 const TargetRegisterClass *
1581  const MachineRegisterInfo &MRI) const {
1582  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1583  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1584  if (!RB)
1585  return nullptr;
1586 
1587  switch (Size) {
1588  case 32:
1589  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1590  &AMDGPU::SReg_32_XM0RegClass;
1591  case 64:
1592  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1593  &AMDGPU::SReg_64_XEXECRegClass;
1594  case 96:
1595  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1596  nullptr;
1597  case 128:
1598  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1599  &AMDGPU::SReg_128RegClass;
1600  default:
1601  llvm_unreachable("not implemented");
1602  }
1603 }
1604 
1605 // Find reaching register definition
1607  MachineInstr &Use,
1609  LiveIntervals *LIS) const {
1610  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1611  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1612  SlotIndex DefIdx;
1613 
1615  if (!LIS->hasInterval(Reg))
1616  return nullptr;
1617  LiveInterval &LI = LIS->getInterval(Reg);
1618  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1619  : MRI.getMaxLaneMaskForVReg(Reg);
1620  VNInfo *V = nullptr;
1621  if (LI.hasSubRanges()) {
1622  for (auto &S : LI.subranges()) {
1623  if ((S.LaneMask & SubLanes) == SubLanes) {
1624  V = S.getVNInfoAt(UseIdx);
1625  break;
1626  }
1627  }
1628  } else {
1629  V = LI.getVNInfoAt(UseIdx);
1630  }
1631  if (!V)
1632  return nullptr;
1633  DefIdx = V->def;
1634  } else {
1635  // Find last def.
1636  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1637  LiveRange &LR = LIS->getRegUnit(*Units);
1638  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1639  if (!DefIdx.isValid() ||
1640  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1641  LIS->getInstructionFromIndex(V->def)))
1642  DefIdx = V->def;
1643  } else {
1644  return nullptr;
1645  }
1646  }
1647  }
1648 
1649  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1650 
1651  if (!Def || !MDT.dominates(Def, &Use))
1652  return nullptr;
1653 
1654  assert(Def->modifiesRegister(Reg, this));
1655 
1656  return Def;
1657 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:637
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:829
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:33
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:717
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:722
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:750
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:418
unsigned FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:408
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
LiveInterval & getInterval(unsigned Reg)
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:404
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
const unsigned Kind
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
bool hasCalls() const
Return true if the current function has any function calls.