LLVM  7.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
26 
27 using namespace llvm;
28 
29 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
30  for (unsigned i = 0; PSets[i] != -1; ++i) {
31  if (PSets[i] == (int)PSetID)
32  return true;
33  }
34  return false;
35 }
36 
37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
38  BitVector &PressureSets) const {
39  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
40  const int *PSets = getRegUnitPressureSets(*U);
41  if (hasPressureSet(PSets, PSetID)) {
42  PressureSets.set(PSetID);
43  break;
44  }
45  }
46 }
47 
49  "amdgpu-spill-sgpr-to-smem",
50  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
51  cl::init(false));
52 
54  "amdgpu-spill-sgpr-to-vgpr",
55  cl::desc("Enable spilling VGPRs to SGPRs"),
57  cl::init(true));
58 
61  SGPRPressureSets(getNumRegPressureSets()),
62  VGPRPressureSets(getNumRegPressureSets()),
63  SpillSGPRToVGPR(false),
64  SpillSGPRToSMEM(false) {
65  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
66  SpillSGPRToSMEM = true;
67  else if (EnableSpillSGPRToVGPR)
68  SpillSGPRToVGPR = true;
69 
70  unsigned NumRegPressureSets = getNumRegPressureSets();
71 
72  SGPRSetID = NumRegPressureSets;
73  VGPRSetID = NumRegPressureSets;
74 
75  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
77  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
78  }
79 
80  // Determine the number of reg units for each pressure set.
81  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
82  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
83  const int *PSets = getRegUnitPressureSets(i);
84  for (unsigned j = 0; PSets[j] != -1; ++j) {
85  ++PressureSetRegUnits[PSets[j]];
86  }
87  }
88 
89  unsigned VGPRMax = 0, SGPRMax = 0;
90  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
91  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
92  VGPRSetID = i;
93  VGPRMax = PressureSetRegUnits[i];
94  continue;
95  }
96  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
97  SGPRSetID = i;
98  SGPRMax = PressureSetRegUnits[i];
99  }
100  }
101 
102  assert(SGPRSetID < NumRegPressureSets &&
103  VGPRSetID < NumRegPressureSets);
104 }
105 
107  const MachineFunction &MF) const {
108 
109  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
111  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
112  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
113 }
114 
115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
116  unsigned Reg;
117 
118  // Try to place it in a hole after PrivateSegmentBufferReg.
119  if (RegCount & 3) {
120  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121  // alignment constraints, so we have a hole where can put the wave offset.
122  Reg = RegCount - 1;
123  } else {
124  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125  // wave offset before it.
126  Reg = RegCount - 5;
127  }
128 
129  return Reg;
130 }
131 
133  const MachineFunction &MF) const {
134  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
136  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
137 }
138 
140  const MachineFunction &MF) const {
141  return AMDGPU::SGPR32;
142 }
143 
145  BitVector Reserved(getNumRegs());
146 
147  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148  // this seems likely to result in bugs, so I'm marking them as reserved.
149  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
150  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
151 
152  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153  reserveRegisterTuples(Reserved, AMDGPU::M0);
154 
155  // Reserve the memory aperture registers.
156  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
160 
161  // Reserve xnack_mask registers - support is not implemented in Codegen.
162  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
163 
164  // Reserve Trap Handler registers - support is not implemented in Codegen.
165  reserveRegisterTuples(Reserved, AMDGPU::TBA);
166  reserveRegisterTuples(Reserved, AMDGPU::TMA);
167  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
168  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
169  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
170  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
171  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
172  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
173  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
174  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
175 
176  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
177 
178  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182  reserveRegisterTuples(Reserved, Reg);
183  }
184 
185  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189  reserveRegisterTuples(Reserved, Reg);
190  }
191 
193 
194  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
198  }
199 
200  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201  if (ScratchRSrcReg != AMDGPU::NoRegister) {
202  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
203  // to spill.
204  // TODO: May need to reserve a VGPR if doing LDS spilling.
205  reserveRegisterTuples(Reserved, ScratchRSrcReg);
206  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
207  }
208 
209  // We have to assume the SP is needed in case there are calls in the function,
210  // which is detected after the function is lowered. If we aren't really going
211  // to need SP, don't bother reserving it.
212  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
213 
214  if (StackPtrReg != AMDGPU::NoRegister) {
215  reserveRegisterTuples(Reserved, StackPtrReg);
216  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
217  }
218 
219  unsigned FrameReg = MFI->getFrameOffsetReg();
220  if (FrameReg != AMDGPU::NoRegister) {
221  reserveRegisterTuples(Reserved, FrameReg);
222  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
223  }
224 
225  return Reserved;
226 }
227 
230  if (Info->isEntryFunction()) {
231  const MachineFrameInfo &MFI = Fn.getFrameInfo();
232  return MFI.hasStackObjects() || MFI.hasCalls();
233  }
234 
235  // May need scavenger for dealing with callee saved registers.
236  return true;
237 }
238 
240  const MachineFunction &MF) const {
241  const MachineFrameInfo &MFI = MF.getFrameInfo();
242  if (MFI.hasStackObjects())
243  return true;
244 
245  // May need to deal with callee saved registers.
247  return !Info->isEntryFunction();
248 }
249 
251  const MachineFunction &MF) const {
252  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253  // create a virtual register for it during frame index elimination, so the
254  // scavenger is directly needed.
255  return MF.getFrameInfo().hasStackObjects() &&
256  MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
257  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
258 }
259 
261  const MachineFunction &) const {
262  // There are no special dedicated stack or frame pointers.
263  return true;
264 }
265 
267  // This helps catch bugs as verifier errors.
268  return true;
269 }
270 
273 
274  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275  AMDGPU::OpName::offset);
276  return MI->getOperand(OffIdx).getImm();
277 }
278 
280  int Idx) const {
281  if (!SIInstrInfo::isMUBUF(*MI))
282  return 0;
283 
285  AMDGPU::OpName::vaddr) &&
286  "Should never see frame index on non-address operand");
287 
288  return getMUBUFInstrOffset(MI);
289 }
290 
292  if (!MI->mayLoadOrStore())
293  return false;
294 
295  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
296 
297  return !isUInt<12>(FullOffset);
298 }
299 
301  unsigned BaseReg,
302  int FrameIdx,
303  int64_t Offset) const {
305  DebugLoc DL; // Defaults to "unknown"
306 
307  if (Ins != MBB->end())
308  DL = Ins->getDebugLoc();
309 
310  MachineFunction *MF = MBB->getParent();
311  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
312  const SIInstrInfo *TII = Subtarget.getInstrInfo();
313 
314  if (Offset == 0) {
315  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316  .addFrameIndex(FrameIdx);
317  return;
318  }
319 
321  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
322 
323  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
324 
325  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
326  .addImm(Offset);
327  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328  .addFrameIndex(FrameIdx);
329 
330  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331  .addReg(OffsetReg, RegState::Kill)
332  .addReg(FIReg);
333 }
334 
336  int64_t Offset) const {
337 
338  MachineBasicBlock *MBB = MI.getParent();
339  MachineFunction *MF = MBB->getParent();
340  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
341  const SIInstrInfo *TII = Subtarget.getInstrInfo();
342 
343 #ifndef NDEBUG
344  // FIXME: Is it possible to be storing a frame index to itself?
345  bool SeenFI = false;
346  for (const MachineOperand &MO: MI.operands()) {
347  if (MO.isFI()) {
348  if (SeenFI)
349  llvm_unreachable("should not see multiple frame indices");
350 
351  SeenFI = true;
352  }
353  }
354 #endif
355 
356  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358  assert(TII->isMUBUF(MI));
359  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361  "should only be seeing frame offset relative FrameIndex");
362 
363 
364  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365  int64_t NewOffset = OffsetOp->getImm() + Offset;
366  assert(isUInt<12>(NewOffset) && "offset should be legal");
367 
368  FIOp->ChangeToRegister(BaseReg, false);
369  OffsetOp->setImm(NewOffset);
370 }
371 
373  unsigned BaseReg,
374  int64_t Offset) const {
375  if (!SIInstrInfo::isMUBUF(*MI))
376  return false;
377 
378  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
379 
380  return isUInt<12>(NewOffset);
381 }
382 
384  const MachineFunction &MF, unsigned Kind) const {
385  // This is inaccurate. It depends on the instruction and address space. The
386  // only place where we should hit this is for dealing with frame indexes /
387  // private accesses, so this is correct in that case.
388  return &AMDGPU::VGPR_32RegClass;
389 }
390 
391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
392 
393  switch (Op) {
394  case AMDGPU::SI_SPILL_S512_SAVE:
395  case AMDGPU::SI_SPILL_S512_RESTORE:
396  case AMDGPU::SI_SPILL_V512_SAVE:
397  case AMDGPU::SI_SPILL_V512_RESTORE:
398  return 16;
399  case AMDGPU::SI_SPILL_S256_SAVE:
400  case AMDGPU::SI_SPILL_S256_RESTORE:
401  case AMDGPU::SI_SPILL_V256_SAVE:
402  case AMDGPU::SI_SPILL_V256_RESTORE:
403  return 8;
404  case AMDGPU::SI_SPILL_S128_SAVE:
405  case AMDGPU::SI_SPILL_S128_RESTORE:
406  case AMDGPU::SI_SPILL_V128_SAVE:
407  case AMDGPU::SI_SPILL_V128_RESTORE:
408  return 4;
409  case AMDGPU::SI_SPILL_V96_SAVE:
410  case AMDGPU::SI_SPILL_V96_RESTORE:
411  return 3;
412  case AMDGPU::SI_SPILL_S64_SAVE:
413  case AMDGPU::SI_SPILL_S64_RESTORE:
414  case AMDGPU::SI_SPILL_V64_SAVE:
415  case AMDGPU::SI_SPILL_V64_RESTORE:
416  return 2;
417  case AMDGPU::SI_SPILL_S32_SAVE:
418  case AMDGPU::SI_SPILL_S32_RESTORE:
419  case AMDGPU::SI_SPILL_V32_SAVE:
420  case AMDGPU::SI_SPILL_V32_RESTORE:
421  return 1;
422  default: llvm_unreachable("Invalid spill opcode");
423  }
424 }
425 
426 static int getOffsetMUBUFStore(unsigned Opc) {
427  switch (Opc) {
428  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
442  default:
443  return -1;
444  }
445 }
446 
447 static int getOffsetMUBUFLoad(unsigned Opc) {
448  switch (Opc) {
449  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
475  default:
476  return -1;
477  }
478 }
479 
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
483  MachineFrameInfo &MFI,
485  int Index,
486  int64_t Offset) {
487  MachineBasicBlock *MBB = MI->getParent();
488  const DebugLoc &DL = MI->getDebugLoc();
489  bool IsStore = MI->mayStore();
490 
491  unsigned Opc = MI->getOpcode();
492  int LoadStoreOp = IsStore ?
494  if (LoadStoreOp == -1)
495  return false;
496 
497  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498  MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
499  .add(*Reg)
500  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
501  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
502  .addImm(Offset)
503  .addImm(0) // glc
504  .addImm(0) // slc
505  .addImm(0) // tfe
506  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
507 
508  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
509  AMDGPU::OpName::vdata_in);
510  if (VDataIn)
511  NewMI.add(*VDataIn);
512  return true;
513 }
514 
515 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
516  unsigned LoadStoreOp,
517  int Index,
518  unsigned ValueReg,
519  bool IsKill,
520  unsigned ScratchRsrcReg,
521  unsigned ScratchOffsetReg,
522  int64_t InstOffset,
523  MachineMemOperand *MMO,
524  RegScavenger *RS) const {
525  MachineBasicBlock *MBB = MI->getParent();
526  MachineFunction *MF = MI->getParent()->getParent();
527  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
528  const SIInstrInfo *TII = ST.getInstrInfo();
529  const MachineFrameInfo &MFI = MF->getFrameInfo();
530 
531  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
532  const DebugLoc &DL = MI->getDebugLoc();
533  bool IsStore = Desc.mayStore();
534 
535  bool RanOutOfSGPRs = false;
536  bool Scavenged = false;
537  unsigned SOffset = ScratchOffsetReg;
538 
539  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
540  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
541  unsigned Size = NumSubRegs * 4;
542  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
543  const int64_t OriginalImmOffset = Offset;
544 
545  unsigned Align = MFI.getObjectAlignment(Index);
546  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
547 
548  if (!isUInt<12>(Offset + Size)) {
549  SOffset = AMDGPU::NoRegister;
550 
551  // We don't have access to the register scavenger if this function is called
552  // during PEI::scavengeFrameVirtualRegs().
553  if (RS)
554  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
555 
556  if (SOffset == AMDGPU::NoRegister) {
557  // There are no free SGPRs, and since we are in the process of spilling
558  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
559  // on SI/CI and on VI it is true until we implement spilling using scalar
560  // stores), we have no way to free up an SGPR. Our solution here is to
561  // add the offset directly to the ScratchOffset register, and then
562  // subtract the offset after the spill to return ScratchOffset to it's
563  // original value.
564  RanOutOfSGPRs = true;
565  SOffset = ScratchOffsetReg;
566  } else {
567  Scavenged = true;
568  }
569 
570  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
571  .addReg(ScratchOffsetReg)
572  .addImm(Offset);
573 
574  Offset = 0;
575  }
576 
577  const unsigned EltSize = 4;
578 
579  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
580  unsigned SubReg = NumSubRegs == 1 ?
581  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
582 
583  unsigned SOffsetRegState = 0;
584  unsigned SrcDstRegState = getDefRegState(!IsStore);
585  if (i + 1 == e) {
586  SOffsetRegState |= getKillRegState(Scavenged);
587  // The last implicit use carries the "Kill" flag.
588  SrcDstRegState |= getKillRegState(IsKill);
589  }
590 
591  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
592  MachineMemOperand *NewMMO
593  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
594  EltSize, MinAlign(Align, EltSize * i));
595 
596  auto MIB = BuildMI(*MBB, MI, DL, Desc)
597  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
598  .addReg(ScratchRsrcReg)
599  .addReg(SOffset, SOffsetRegState)
600  .addImm(Offset)
601  .addImm(0) // glc
602  .addImm(0) // slc
603  .addImm(0) // tfe
604  .addMemOperand(NewMMO);
605 
606  if (NumSubRegs > 1)
607  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
608  }
609 
610  if (RanOutOfSGPRs) {
611  // Subtract the offset we added to the ScratchOffset register.
612  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
613  .addReg(ScratchOffsetReg)
614  .addImm(OriginalImmOffset);
615  }
616 }
617 
618 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
619  bool Store) {
620  if (SuperRegSize % 16 == 0) {
621  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
622  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
623  }
624 
625  if (SuperRegSize % 8 == 0) {
626  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
627  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
628  }
629 
630  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
631  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
632 }
633 
635  int Index,
636  RegScavenger *RS,
637  bool OnlyToVGPR) const {
638  MachineBasicBlock *MBB = MI->getParent();
639  MachineFunction *MF = MBB->getParent();
641  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
642 
644  = MFI->getSGPRToVGPRSpills(Index);
645  bool SpillToVGPR = !VGPRSpills.empty();
646  if (OnlyToVGPR && !SpillToVGPR)
647  return false;
648 
650  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
651  const SIInstrInfo *TII = ST.getInstrInfo();
652 
653  unsigned SuperReg = MI->getOperand(0).getReg();
654  bool IsKill = MI->getOperand(0).isKill();
655  const DebugLoc &DL = MI->getDebugLoc();
656 
657  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
658 
659  bool SpillToSMEM = spillSGPRToSMEM();
660  if (SpillToSMEM && OnlyToVGPR)
661  return false;
662 
663  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
664  SuperReg != MFI->getFrameOffsetReg() &&
665  SuperReg != MFI->getScratchWaveOffsetReg()));
666 
667  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
668 
669  unsigned OffsetReg = AMDGPU::M0;
670  unsigned M0CopyReg = AMDGPU::NoRegister;
671 
672  if (SpillToSMEM) {
673  if (RS->isRegUsed(AMDGPU::M0)) {
674  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
675  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
676  .addReg(AMDGPU::M0);
677  }
678  }
679 
680  unsigned ScalarStoreOp;
681  unsigned EltSize = 4;
682  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
683  if (SpillToSMEM && isSGPRClass(RC)) {
684  // XXX - if private_element_size is larger than 4 it might be useful to be
685  // able to spill wider vmem spills.
686  std::tie(EltSize, ScalarStoreOp) =
687  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
688  }
689 
690  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
691  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
692 
693  // SubReg carries the "Kill" flag when SubReg == SuperReg.
694  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
695  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
696  unsigned SubReg = NumSubRegs == 1 ?
697  SuperReg : getSubReg(SuperReg, SplitParts[i]);
698 
699  if (SpillToSMEM) {
700  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
701 
702  // The allocated memory size is really the wavefront size * the frame
703  // index size. The widest register class is 64 bytes, so a 4-byte scratch
704  // allocation is enough to spill this in a single stack object.
705  //
706  // FIXME: Frame size/offsets are computed earlier than this, so the extra
707  // space is still unnecessarily allocated.
708 
709  unsigned Align = FrameInfo.getObjectAlignment(Index);
710  MachinePointerInfo PtrInfo
711  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
712  MachineMemOperand *MMO
714  EltSize, MinAlign(Align, EltSize * i));
715 
716  // SMEM instructions only support a single offset, so increment the wave
717  // offset.
718 
719  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
720  if (Offset != 0) {
721  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
722  .addReg(MFI->getFrameOffsetReg())
723  .addImm(Offset);
724  } else {
725  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
726  .addReg(MFI->getFrameOffsetReg());
727  }
728 
729  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
730  .addReg(SubReg, getKillRegState(IsKill)) // sdata
731  .addReg(MFI->getScratchRSrcReg()) // sbase
732  .addReg(OffsetReg, RegState::Kill) // soff
733  .addImm(0) // glc
734  .addMemOperand(MMO);
735 
736  continue;
737  }
738 
739  if (SpillToVGPR) {
740  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
741 
742  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
743  // only circumstance in which we say it is undefined is when it is the
744  // first spill to this VGPR in the first basic block.
745  bool VGPRDefined = true;
746  if (MBB == &MF->front())
747  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
748 
749  // Mark the "old value of vgpr" input undef only if this is the first sgpr
750  // spill to this specific vgpr in the first basic block.
751  BuildMI(*MBB, MI, DL,
752  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
753  Spill.VGPR)
754  .addReg(SubReg, getKillRegState(IsKill))
755  .addImm(Spill.Lane)
756  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
757 
758  // FIXME: Since this spills to another register instead of an actual
759  // frame index, we should delete the frame index when all references to
760  // it are fixed.
761  } else {
762  // XXX - Can to VGPR spill fail for some subregisters but not others?
763  if (OnlyToVGPR)
764  return false;
765 
766  // Spill SGPR to a frame index.
767  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
768  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
769  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
770 
772  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
773  .addReg(SubReg, SubKillState);
774 
775 
776  // There could be undef components of a spilled super register.
777  // TODO: Can we detect this and skip the spill?
778  if (NumSubRegs > 1) {
779  // The last implicit use of the SuperReg carries the "Kill" flag.
780  unsigned SuperKillState = 0;
781  if (i + 1 == e)
782  SuperKillState |= getKillRegState(IsKill);
783  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
784  }
785 
786  unsigned Align = FrameInfo.getObjectAlignment(Index);
787  MachinePointerInfo PtrInfo
788  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
789  MachineMemOperand *MMO
791  EltSize, MinAlign(Align, EltSize * i));
792  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
793  .addReg(TmpReg, RegState::Kill) // src
794  .addFrameIndex(Index) // vaddr
795  .addReg(MFI->getScratchRSrcReg()) // srrsrc
796  .addReg(MFI->getFrameOffsetReg()) // soffset
797  .addImm(i * 4) // offset
798  .addMemOperand(MMO);
799  }
800  }
801 
802  if (M0CopyReg != AMDGPU::NoRegister) {
803  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
804  .addReg(M0CopyReg, RegState::Kill);
805  }
806 
807  MI->eraseFromParent();
808  MFI->addToSpilledSGPRs(NumSubRegs);
809  return true;
810 }
811 
813  int Index,
814  RegScavenger *RS,
815  bool OnlyToVGPR) const {
816  MachineFunction *MF = MI->getParent()->getParent();
818  MachineBasicBlock *MBB = MI->getParent();
820 
822  = MFI->getSGPRToVGPRSpills(Index);
823  bool SpillToVGPR = !VGPRSpills.empty();
824  if (OnlyToVGPR && !SpillToVGPR)
825  return false;
826 
827  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
828  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
829  const SIInstrInfo *TII = ST.getInstrInfo();
830  const DebugLoc &DL = MI->getDebugLoc();
831 
832  unsigned SuperReg = MI->getOperand(0).getReg();
833  bool SpillToSMEM = spillSGPRToSMEM();
834  if (SpillToSMEM && OnlyToVGPR)
835  return false;
836 
837  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
838 
839  unsigned OffsetReg = AMDGPU::M0;
840  unsigned M0CopyReg = AMDGPU::NoRegister;
841 
842  if (SpillToSMEM) {
843  if (RS->isRegUsed(AMDGPU::M0)) {
844  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
845  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
846  .addReg(AMDGPU::M0);
847  }
848  }
849 
850  unsigned EltSize = 4;
851  unsigned ScalarLoadOp;
852 
853  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
854  if (SpillToSMEM && isSGPRClass(RC)) {
855  // XXX - if private_element_size is larger than 4 it might be useful to be
856  // able to spill wider vmem spills.
857  std::tie(EltSize, ScalarLoadOp) =
858  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
859  }
860 
861  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
862  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
863 
864  // SubReg carries the "Kill" flag when SubReg == SuperReg.
865  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
866 
867  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
868  unsigned SubReg = NumSubRegs == 1 ?
869  SuperReg : getSubReg(SuperReg, SplitParts[i]);
870 
871  if (SpillToSMEM) {
872  // FIXME: Size may be > 4 but extra bytes wasted.
873  unsigned Align = FrameInfo.getObjectAlignment(Index);
874  MachinePointerInfo PtrInfo
875  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
876  MachineMemOperand *MMO
878  EltSize, MinAlign(Align, EltSize * i));
879 
880  // Add i * 4 offset
881  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
882  if (Offset != 0) {
883  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
884  .addReg(MFI->getFrameOffsetReg())
885  .addImm(Offset);
886  } else {
887  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
888  .addReg(MFI->getFrameOffsetReg());
889  }
890 
891  auto MIB =
892  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
893  .addReg(MFI->getScratchRSrcReg()) // sbase
894  .addReg(OffsetReg, RegState::Kill) // soff
895  .addImm(0) // glc
896  .addMemOperand(MMO);
897 
898  if (NumSubRegs > 1)
899  MIB.addReg(SuperReg, RegState::ImplicitDefine);
900 
901  continue;
902  }
903 
904  if (SpillToVGPR) {
905  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
906  auto MIB =
907  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
908  SubReg)
909  .addReg(Spill.VGPR)
910  .addImm(Spill.Lane);
911 
912  if (NumSubRegs > 1)
913  MIB.addReg(SuperReg, RegState::ImplicitDefine);
914  } else {
915  if (OnlyToVGPR)
916  return false;
917 
918  // Restore SGPR from a stack slot.
919  // FIXME: We should use S_LOAD_DWORD here for VI.
920  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
921  unsigned Align = FrameInfo.getObjectAlignment(Index);
922 
923  MachinePointerInfo PtrInfo
924  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
925 
926  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
927  MachineMemOperand::MOLoad, EltSize,
928  MinAlign(Align, EltSize * i));
929 
930  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
931  .addFrameIndex(Index) // vaddr
932  .addReg(MFI->getScratchRSrcReg()) // srsrc
933  .addReg(MFI->getFrameOffsetReg()) // soffset
934  .addImm(i * 4) // offset
935  .addMemOperand(MMO);
936 
937  auto MIB =
938  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
939  .addReg(TmpReg, RegState::Kill);
940 
941  if (NumSubRegs > 1)
942  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
943  }
944  }
945 
946  if (M0CopyReg != AMDGPU::NoRegister) {
947  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
948  .addReg(M0CopyReg, RegState::Kill);
949  }
950 
951  MI->eraseFromParent();
952  return true;
953 }
954 
955 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
956 /// a VGPR and the stack slot can be safely eliminated when all other users are
957 /// handled.
960  int FI,
961  RegScavenger *RS) const {
962  switch (MI->getOpcode()) {
963  case AMDGPU::SI_SPILL_S512_SAVE:
964  case AMDGPU::SI_SPILL_S256_SAVE:
965  case AMDGPU::SI_SPILL_S128_SAVE:
966  case AMDGPU::SI_SPILL_S64_SAVE:
967  case AMDGPU::SI_SPILL_S32_SAVE:
968  return spillSGPR(MI, FI, RS, true);
969  case AMDGPU::SI_SPILL_S512_RESTORE:
970  case AMDGPU::SI_SPILL_S256_RESTORE:
971  case AMDGPU::SI_SPILL_S128_RESTORE:
972  case AMDGPU::SI_SPILL_S64_RESTORE:
973  case AMDGPU::SI_SPILL_S32_RESTORE:
974  return restoreSGPR(MI, FI, RS, true);
975  default:
976  llvm_unreachable("not an SGPR spill instruction");
977  }
978 }
979 
981  int SPAdj, unsigned FIOperandNum,
982  RegScavenger *RS) const {
983  MachineFunction *MF = MI->getParent()->getParent();
985  MachineBasicBlock *MBB = MI->getParent();
987  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
988  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
989  const SIInstrInfo *TII = ST.getInstrInfo();
990  DebugLoc DL = MI->getDebugLoc();
991 
992  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
993  int Index = MI->getOperand(FIOperandNum).getIndex();
994 
995  switch (MI->getOpcode()) {
996  // SGPR register spill
997  case AMDGPU::SI_SPILL_S512_SAVE:
998  case AMDGPU::SI_SPILL_S256_SAVE:
999  case AMDGPU::SI_SPILL_S128_SAVE:
1000  case AMDGPU::SI_SPILL_S64_SAVE:
1001  case AMDGPU::SI_SPILL_S32_SAVE: {
1002  spillSGPR(MI, Index, RS);
1003  break;
1004  }
1005 
1006  // SGPR register restore
1007  case AMDGPU::SI_SPILL_S512_RESTORE:
1008  case AMDGPU::SI_SPILL_S256_RESTORE:
1009  case AMDGPU::SI_SPILL_S128_RESTORE:
1010  case AMDGPU::SI_SPILL_S64_RESTORE:
1011  case AMDGPU::SI_SPILL_S32_RESTORE: {
1012  restoreSGPR(MI, Index, RS);
1013  break;
1014  }
1015 
1016  // VGPR register spill
1017  case AMDGPU::SI_SPILL_V512_SAVE:
1018  case AMDGPU::SI_SPILL_V256_SAVE:
1019  case AMDGPU::SI_SPILL_V128_SAVE:
1020  case AMDGPU::SI_SPILL_V96_SAVE:
1021  case AMDGPU::SI_SPILL_V64_SAVE:
1022  case AMDGPU::SI_SPILL_V32_SAVE: {
1023  const MachineOperand *VData = TII->getNamedOperand(*MI,
1024  AMDGPU::OpName::vdata);
1025  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1026  Index,
1027  VData->getReg(), VData->isKill(),
1028  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1029  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1030  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1031  *MI->memoperands_begin(),
1032  RS);
1033  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1034  MI->eraseFromParent();
1035  break;
1036  }
1037  case AMDGPU::SI_SPILL_V32_RESTORE:
1038  case AMDGPU::SI_SPILL_V64_RESTORE:
1039  case AMDGPU::SI_SPILL_V96_RESTORE:
1040  case AMDGPU::SI_SPILL_V128_RESTORE:
1041  case AMDGPU::SI_SPILL_V256_RESTORE:
1042  case AMDGPU::SI_SPILL_V512_RESTORE: {
1043  const MachineOperand *VData = TII->getNamedOperand(*MI,
1044  AMDGPU::OpName::vdata);
1045 
1046  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1047  Index,
1048  VData->getReg(), VData->isKill(),
1049  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1050  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1051  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1052  *MI->memoperands_begin(),
1053  RS);
1054  MI->eraseFromParent();
1055  break;
1056  }
1057 
1058  default: {
1059  const DebugLoc &DL = MI->getDebugLoc();
1060  bool IsMUBUF = TII->isMUBUF(*MI);
1061 
1062  if (!IsMUBUF &&
1063  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1064  // Convert to an absolute stack address by finding the offset from the
1065  // scratch wave base and scaling by the wave size.
1066  //
1067  // In an entry function/kernel the stack address is already the
1068  // absolute address relative to the scratch wave offset.
1069 
1070  unsigned DiffReg
1071  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1072 
1073  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1074  unsigned ResultReg = IsCopy ?
1075  MI->getOperand(0).getReg() :
1076  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1077 
1078  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1079  .addReg(MFI->getFrameOffsetReg())
1080  .addReg(MFI->getScratchWaveOffsetReg());
1081 
1082  int64_t Offset = FrameInfo.getObjectOffset(Index);
1083  if (Offset == 0) {
1084  // XXX - This never happens because of emergency scavenging slot at 0?
1085  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1086  .addImm(Log2_32(ST.getWavefrontSize()))
1087  .addReg(DiffReg);
1088  } else {
1089  unsigned ScaledReg
1090  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1091 
1092  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1093  .addImm(Log2_32(ST.getWavefrontSize()))
1094  .addReg(DiffReg, RegState::Kill);
1095 
1096  // TODO: Fold if use instruction is another add of a constant.
1098  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1099  .addImm(Offset)
1100  .addReg(ScaledReg, RegState::Kill);
1101  } else {
1102  unsigned ConstOffsetReg
1103  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1104 
1105  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1106  .addImm(Offset);
1107  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1108  .addReg(ConstOffsetReg, RegState::Kill)
1109  .addReg(ScaledReg, RegState::Kill);
1110  }
1111  }
1112 
1113  // Don't introduce an extra copy if we're just materializing in a mov.
1114  if (IsCopy)
1115  MI->eraseFromParent();
1116  else
1117  FIOp.ChangeToRegister(ResultReg, false, false, true);
1118  return;
1119  }
1120 
1121  if (IsMUBUF) {
1122  // Disable offen so we don't need a 0 vgpr base.
1123  assert(static_cast<int>(FIOperandNum) ==
1124  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1125  AMDGPU::OpName::vaddr));
1126 
1127  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1128  == MFI->getFrameOffsetReg());
1129 
1130  int64_t Offset = FrameInfo.getObjectOffset(Index);
1131  int64_t OldImm
1132  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1133  int64_t NewOffset = OldImm + Offset;
1134 
1135  if (isUInt<12>(NewOffset) &&
1136  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1137  MI->eraseFromParent();
1138  return;
1139  }
1140  }
1141 
1142  // If the offset is simply too big, don't convert to a scratch wave offset
1143  // relative index.
1144 
1145  int64_t Offset = FrameInfo.getObjectOffset(Index);
1146  FIOp.ChangeToImmediate(Offset);
1147  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1148  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1149  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1150  .addImm(Offset);
1151  FIOp.ChangeToRegister(TmpReg, false, false, true);
1152  }
1153  }
1154  }
1155 }
1156 
1158  #define AMDGPU_REG_ASM_NAMES
1159  #include "AMDGPURegAsmNames.inc.cpp"
1160 
1161  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1162  if (Reg >= BeginReg && Reg <= EndReg) { \
1163  unsigned Index = Reg - BeginReg; \
1164  assert(Index < array_lengthof(RegTable)); \
1165  return RegTable[Index]; \
1166  }
1167 
1168  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1169  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1170  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1171  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1172  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1173  VGPR96RegNames);
1174 
1175  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1176  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1177  VGPR128RegNames);
1178  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1179  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1180  SGPR128RegNames);
1181 
1182  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1183  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1184  VGPR256RegNames);
1185 
1186  REG_RANGE(
1187  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1188  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1189  VGPR512RegNames);
1190 
1191  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1192  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1193  SGPR256RegNames);
1194 
1195  REG_RANGE(
1196  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1197  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1198  SGPR512RegNames
1199  );
1200 
1201 #undef REG_RANGE
1202 
1203  // FIXME: Rename flat_scr so we don't need to special case this.
1204  switch (Reg) {
1205  case AMDGPU::FLAT_SCR:
1206  return "flat_scratch";
1207  case AMDGPU::FLAT_SCR_LO:
1208  return "flat_scratch_lo";
1209  case AMDGPU::FLAT_SCR_HI:
1210  return "flat_scratch_hi";
1211  default:
1212  // For the special named registers the default is fine.
1214  }
1215 }
1216 
1217 // FIXME: This is very slow. It might be worth creating a map from physreg to
1218 // register class.
1221 
1222  static const TargetRegisterClass *const BaseClasses[] = {
1223  &AMDGPU::VGPR_32RegClass,
1224  &AMDGPU::SReg_32RegClass,
1225  &AMDGPU::VReg_64RegClass,
1226  &AMDGPU::SReg_64RegClass,
1227  &AMDGPU::VReg_96RegClass,
1228  &AMDGPU::VReg_128RegClass,
1229  &AMDGPU::SReg_128RegClass,
1230  &AMDGPU::VReg_256RegClass,
1231  &AMDGPU::SReg_256RegClass,
1232  &AMDGPU::VReg_512RegClass,
1233  &AMDGPU::SReg_512RegClass,
1234  &AMDGPU::SCC_CLASSRegClass,
1235  &AMDGPU::Pseudo_SReg_32RegClass,
1236  &AMDGPU::Pseudo_SReg_128RegClass,
1237  };
1238 
1239  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1240  if (BaseClass->contains(Reg)) {
1241  return BaseClass;
1242  }
1243  }
1244  return nullptr;
1245 }
1246 
1247 // TODO: It might be helpful to have some target specific flags in
1248 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1250  unsigned Size = getRegSizeInBits(*RC);
1251  if (Size < 32)
1252  return false;
1253  switch (Size) {
1254  case 32:
1255  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1256  case 64:
1257  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1258  case 96:
1259  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1260  case 128:
1261  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1262  case 256:
1263  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1264  case 512:
1265  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1266  default:
1267  llvm_unreachable("Invalid register class size");
1268  }
1269 }
1270 
1272  const TargetRegisterClass *SRC) const {
1273  switch (getRegSizeInBits(*SRC)) {
1274  case 32:
1275  return &AMDGPU::VGPR_32RegClass;
1276  case 64:
1277  return &AMDGPU::VReg_64RegClass;
1278  case 96:
1279  return &AMDGPU::VReg_96RegClass;
1280  case 128:
1281  return &AMDGPU::VReg_128RegClass;
1282  case 256:
1283  return &AMDGPU::VReg_256RegClass;
1284  case 512:
1285  return &AMDGPU::VReg_512RegClass;
1286  default:
1287  llvm_unreachable("Invalid register class size");
1288  }
1289 }
1290 
1292  const TargetRegisterClass *VRC) const {
1293  switch (getRegSizeInBits(*VRC)) {
1294  case 32:
1295  return &AMDGPU::SGPR_32RegClass;
1296  case 64:
1297  return &AMDGPU::SReg_64RegClass;
1298  case 128:
1299  return &AMDGPU::SReg_128RegClass;
1300  case 256:
1301  return &AMDGPU::SReg_256RegClass;
1302  case 512:
1303  return &AMDGPU::SReg_512RegClass;
1304  default:
1305  llvm_unreachable("Invalid register class size");
1306  }
1307 }
1308 
1310  const TargetRegisterClass *RC, unsigned SubIdx) const {
1311  if (SubIdx == AMDGPU::NoSubRegister)
1312  return RC;
1313 
1314  // We can assume that each lane corresponds to one 32-bit register.
1315  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1316  if (isSGPRClass(RC)) {
1317  switch (Count) {
1318  case 1:
1319  return &AMDGPU::SGPR_32RegClass;
1320  case 2:
1321  return &AMDGPU::SReg_64RegClass;
1322  case 4:
1323  return &AMDGPU::SReg_128RegClass;
1324  case 8:
1325  return &AMDGPU::SReg_256RegClass;
1326  case 16: /* fall-through */
1327  default:
1328  llvm_unreachable("Invalid sub-register class size");
1329  }
1330  } else {
1331  switch (Count) {
1332  case 1:
1333  return &AMDGPU::VGPR_32RegClass;
1334  case 2:
1335  return &AMDGPU::VReg_64RegClass;
1336  case 3:
1337  return &AMDGPU::VReg_96RegClass;
1338  case 4:
1339  return &AMDGPU::VReg_128RegClass;
1340  case 8:
1341  return &AMDGPU::VReg_256RegClass;
1342  case 16: /* fall-through */
1343  default:
1344  llvm_unreachable("Invalid sub-register class size");
1345  }
1346  }
1347 }
1348 
1350  const TargetRegisterClass *DefRC,
1351  unsigned DefSubReg,
1352  const TargetRegisterClass *SrcRC,
1353  unsigned SrcSubReg) const {
1354  // We want to prefer the smallest register class possible, so we don't want to
1355  // stop and rewrite on anything that looks like a subregister
1356  // extract. Operations mostly don't care about the super register class, so we
1357  // only want to stop on the most basic of copies between the same register
1358  // class.
1359  //
1360  // e.g. if we have something like
1361  // %0 = ...
1362  // %1 = ...
1363  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1364  // %3 = COPY %2, sub0
1365  //
1366  // We want to look through the COPY to find:
1367  // => %3 = COPY %0
1368 
1369  // Plain copy.
1370  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1371 }
1372 
1373 /// Returns a register that is not used at any point in the function.
1374 /// If all registers are used, then this function will return
1375 // AMDGPU::NoRegister.
1376 unsigned
1378  const TargetRegisterClass *RC,
1379  const MachineFunction &MF) const {
1380 
1381  for (unsigned Reg : *RC)
1382  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1383  return Reg;
1384  return AMDGPU::NoRegister;
1385 }
1386 
1388  unsigned EltSize) const {
1389  if (EltSize == 4) {
1390  static const int16_t Sub0_15[] = {
1391  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1392  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1393  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1394  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1395  };
1396 
1397  static const int16_t Sub0_7[] = {
1398  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1399  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1400  };
1401 
1402  static const int16_t Sub0_3[] = {
1403  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1404  };
1405 
1406  static const int16_t Sub0_2[] = {
1407  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1408  };
1409 
1410  static const int16_t Sub0_1[] = {
1411  AMDGPU::sub0, AMDGPU::sub1,
1412  };
1413 
1414  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1415  case 32:
1416  return {};
1417  case 64:
1418  return makeArrayRef(Sub0_1);
1419  case 96:
1420  return makeArrayRef(Sub0_2);
1421  case 128:
1422  return makeArrayRef(Sub0_3);
1423  case 256:
1424  return makeArrayRef(Sub0_7);
1425  case 512:
1426  return makeArrayRef(Sub0_15);
1427  default:
1428  llvm_unreachable("unhandled register size");
1429  }
1430  }
1431 
1432  if (EltSize == 8) {
1433  static const int16_t Sub0_15_64[] = {
1434  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1435  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1436  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1437  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1438  };
1439 
1440  static const int16_t Sub0_7_64[] = {
1441  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1442  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1443  };
1444 
1445 
1446  static const int16_t Sub0_3_64[] = {
1447  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1448  };
1449 
1450  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1451  case 64:
1452  return {};
1453  case 128:
1454  return makeArrayRef(Sub0_3_64);
1455  case 256:
1456  return makeArrayRef(Sub0_7_64);
1457  case 512:
1458  return makeArrayRef(Sub0_15_64);
1459  default:
1460  llvm_unreachable("unhandled register size");
1461  }
1462  }
1463 
1464  assert(EltSize == 16 && "unhandled register spill split size");
1465 
1466  static const int16_t Sub0_15_128[] = {
1467  AMDGPU::sub0_sub1_sub2_sub3,
1468  AMDGPU::sub4_sub5_sub6_sub7,
1469  AMDGPU::sub8_sub9_sub10_sub11,
1470  AMDGPU::sub12_sub13_sub14_sub15
1471  };
1472 
1473  static const int16_t Sub0_7_128[] = {
1474  AMDGPU::sub0_sub1_sub2_sub3,
1475  AMDGPU::sub4_sub5_sub6_sub7
1476  };
1477 
1478  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1479  case 128:
1480  return {};
1481  case 256:
1482  return makeArrayRef(Sub0_7_128);
1483  case 512:
1484  return makeArrayRef(Sub0_15_128);
1485  default:
1486  llvm_unreachable("unhandled register size");
1487  }
1488 }
1489 
1490 const TargetRegisterClass*
1492  unsigned Reg) const {
1494  return MRI.getRegClass(Reg);
1495 
1496  return getPhysRegClass(Reg);
1497 }
1498 
1500  unsigned Reg) const {
1501  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1502  assert(RC && "Register class for the reg not found");
1503  return hasVGPRs(RC);
1504 }
1505 
1507  const TargetRegisterClass *SrcRC,
1508  unsigned SubReg,
1509  const TargetRegisterClass *DstRC,
1510  unsigned DstSubReg,
1511  const TargetRegisterClass *NewRC,
1512  LiveIntervals &LIS) const {
1513  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1514  unsigned DstSize = getRegSizeInBits(*DstRC);
1515  unsigned NewSize = getRegSizeInBits(*NewRC);
1516 
1517  // Do not increase size of registers beyond dword, we would need to allocate
1518  // adjacent registers and constraint regalloc more than needed.
1519 
1520  // Always allow dword coalescing.
1521  if (SrcSize <= 32 || DstSize <= 32)
1522  return true;
1523 
1524  return NewSize <= DstSize || NewSize <= SrcSize;
1525 }
1526 
1528  MachineFunction &MF) const {
1529 
1530  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1532 
1533  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1534  MF.getFunction());
1535  switch (RC->getID()) {
1536  default:
1537  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1538  case AMDGPU::VGPR_32RegClassID:
1539  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1540  case AMDGPU::SGPR_32RegClassID:
1541  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1542  }
1543 }
1544 
1546  unsigned Idx) const {
1547  if (Idx == getVGPRPressureSet())
1548  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1549  const_cast<MachineFunction &>(MF));
1550 
1551  if (Idx == getSGPRPressureSet())
1552  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1553  const_cast<MachineFunction &>(MF));
1554 
1555  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1556 }
1557 
1558 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1559  static const int Empty[] = { -1 };
1560 
1561  if (hasRegUnit(AMDGPU::M0, RegUnit))
1562  return Empty;
1563  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1564 }
1565 
1567  // Not a callee saved register.
1568  return AMDGPU::SGPR30_SGPR31;
1569 }
1570 
1571 const TargetRegisterClass *
1573  const MachineRegisterInfo &MRI) const {
1574  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1575  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1576  if (!RB)
1577  return nullptr;
1578 
1579  switch (Size) {
1580  case 32:
1581  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1582  &AMDGPU::SReg_32_XM0RegClass;
1583  case 64:
1584  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1585  &AMDGPU::SReg_64_XEXECRegClass;
1586  case 96:
1587  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1588  nullptr;
1589  case 128:
1590  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1591  &AMDGPU::SReg_128RegClass;
1592  default:
1593  llvm_unreachable("not implemented");
1594  }
1595 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:162
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
bool hasInv2PiInlineImm() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:682
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:34
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718
A description of a memory reference used in the backend.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:404
unsigned FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
This class implements the register bank concept.
Definition: RegisterBank.h:29
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:398
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:156
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
const unsigned Kind
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:48
bool hasCalls() const
Return true if the current function has any function calls.