LLVM  8.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
26 
27 using namespace llvm;
28 
29 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
30  for (unsigned i = 0; PSets[i] != -1; ++i) {
31  if (PSets[i] == (int)PSetID)
32  return true;
33  }
34  return false;
35 }
36 
37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
38  BitVector &PressureSets) const {
39  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
40  const int *PSets = getRegUnitPressureSets(*U);
41  if (hasPressureSet(PSets, PSetID)) {
42  PressureSets.set(PSetID);
43  break;
44  }
45  }
46 }
47 
49  "amdgpu-spill-sgpr-to-smem",
50  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
51  cl::init(false));
52 
54  "amdgpu-spill-sgpr-to-vgpr",
55  cl::desc("Enable spilling VGPRs to SGPRs"),
57  cl::init(true));
58 
61  SGPRPressureSets(getNumRegPressureSets()),
62  VGPRPressureSets(getNumRegPressureSets()),
63  SpillSGPRToVGPR(false),
64  SpillSGPRToSMEM(false) {
65  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
66  SpillSGPRToSMEM = true;
67  else if (EnableSpillSGPRToVGPR)
68  SpillSGPRToVGPR = true;
69 
70  unsigned NumRegPressureSets = getNumRegPressureSets();
71 
72  SGPRSetID = NumRegPressureSets;
73  VGPRSetID = NumRegPressureSets;
74 
75  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
77  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
78  }
79 
80  // Determine the number of reg units for each pressure set.
81  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
82  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
83  const int *PSets = getRegUnitPressureSets(i);
84  for (unsigned j = 0; PSets[j] != -1; ++j) {
85  ++PressureSetRegUnits[PSets[j]];
86  }
87  }
88 
89  unsigned VGPRMax = 0, SGPRMax = 0;
90  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
91  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
92  VGPRSetID = i;
93  VGPRMax = PressureSetRegUnits[i];
94  continue;
95  }
96  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
97  SGPRSetID = i;
98  SGPRMax = PressureSetRegUnits[i];
99  }
100  }
101 
102  assert(SGPRSetID < NumRegPressureSets &&
103  VGPRSetID < NumRegPressureSets);
104 }
105 
107  const MachineFunction &MF) const {
108 
109  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
111  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
112  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
113 }
114 
115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
116  unsigned Reg;
117 
118  // Try to place it in a hole after PrivateSegmentBufferReg.
119  if (RegCount & 3) {
120  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121  // alignment constraints, so we have a hole where can put the wave offset.
122  Reg = RegCount - 1;
123  } else {
124  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125  // wave offset before it.
126  Reg = RegCount - 5;
127  }
128 
129  return Reg;
130 }
131 
133  const MachineFunction &MF) const {
134  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
136  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
137 }
138 
140  const MachineFunction &MF) const {
141  return AMDGPU::SGPR32;
142 }
143 
145  BitVector Reserved(getNumRegs());
146 
147  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148  // this seems likely to result in bugs, so I'm marking them as reserved.
149  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
150  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
151 
152  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153  reserveRegisterTuples(Reserved, AMDGPU::M0);
154 
155  // Reserve the memory aperture registers.
156  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
160 
161  // Reserve xnack_mask registers - support is not implemented in Codegen.
162  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
163 
164  // Reserve Trap Handler registers - support is not implemented in Codegen.
165  reserveRegisterTuples(Reserved, AMDGPU::TBA);
166  reserveRegisterTuples(Reserved, AMDGPU::TMA);
167  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
168  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
169  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
170  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
171  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
172  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
173  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
174  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
175 
176  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
177 
178  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182  reserveRegisterTuples(Reserved, Reg);
183  }
184 
185  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189  reserveRegisterTuples(Reserved, Reg);
190  }
191 
193 
194  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
198  }
199 
200  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201  if (ScratchRSrcReg != AMDGPU::NoRegister) {
202  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
203  // to spill.
204  // TODO: May need to reserve a VGPR if doing LDS spilling.
205  reserveRegisterTuples(Reserved, ScratchRSrcReg);
206  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
207  }
208 
209  // We have to assume the SP is needed in case there are calls in the function,
210  // which is detected after the function is lowered. If we aren't really going
211  // to need SP, don't bother reserving it.
212  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
213 
214  if (StackPtrReg != AMDGPU::NoRegister) {
215  reserveRegisterTuples(Reserved, StackPtrReg);
216  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
217  }
218 
219  unsigned FrameReg = MFI->getFrameOffsetReg();
220  if (FrameReg != AMDGPU::NoRegister) {
221  reserveRegisterTuples(Reserved, FrameReg);
222  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
223  }
224 
225  return Reserved;
226 }
227 
230  if (Info->isEntryFunction()) {
231  const MachineFrameInfo &MFI = Fn.getFrameInfo();
232  return MFI.hasStackObjects() || MFI.hasCalls();
233  }
234 
235  // May need scavenger for dealing with callee saved registers.
236  return true;
237 }
238 
240  const MachineFunction &MF) const {
241  const MachineFrameInfo &MFI = MF.getFrameInfo();
242  if (MFI.hasStackObjects())
243  return true;
244 
245  // May need to deal with callee saved registers.
247  return !Info->isEntryFunction();
248 }
249 
251  const MachineFunction &MF) const {
252  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253  // create a virtual register for it during frame index elimination, so the
254  // scavenger is directly needed.
255  return MF.getFrameInfo().hasStackObjects() &&
256  MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
257  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
258 }
259 
261  const MachineFunction &) const {
262  // There are no special dedicated stack or frame pointers.
263  return true;
264 }
265 
267  // This helps catch bugs as verifier errors.
268  return true;
269 }
270 
273 
274  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275  AMDGPU::OpName::offset);
276  return MI->getOperand(OffIdx).getImm();
277 }
278 
280  int Idx) const {
281  if (!SIInstrInfo::isMUBUF(*MI))
282  return 0;
283 
285  AMDGPU::OpName::vaddr) &&
286  "Should never see frame index on non-address operand");
287 
288  return getMUBUFInstrOffset(MI);
289 }
290 
292  if (!MI->mayLoadOrStore())
293  return false;
294 
295  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
296 
297  return !isUInt<12>(FullOffset);
298 }
299 
301  unsigned BaseReg,
302  int FrameIdx,
303  int64_t Offset) const {
305  DebugLoc DL; // Defaults to "unknown"
306 
307  if (Ins != MBB->end())
308  DL = Ins->getDebugLoc();
309 
310  MachineFunction *MF = MBB->getParent();
311  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
312  const SIInstrInfo *TII = Subtarget.getInstrInfo();
313 
314  if (Offset == 0) {
315  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316  .addFrameIndex(FrameIdx);
317  return;
318  }
319 
321  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
322 
323  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
324 
325  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
326  .addImm(Offset);
327  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328  .addFrameIndex(FrameIdx);
329 
330  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331  .addReg(OffsetReg, RegState::Kill)
332  .addReg(FIReg);
333 }
334 
336  int64_t Offset) const {
337 
338  MachineBasicBlock *MBB = MI.getParent();
339  MachineFunction *MF = MBB->getParent();
340  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
341  const SIInstrInfo *TII = Subtarget.getInstrInfo();
342 
343 #ifndef NDEBUG
344  // FIXME: Is it possible to be storing a frame index to itself?
345  bool SeenFI = false;
346  for (const MachineOperand &MO: MI.operands()) {
347  if (MO.isFI()) {
348  if (SeenFI)
349  llvm_unreachable("should not see multiple frame indices");
350 
351  SeenFI = true;
352  }
353  }
354 #endif
355 
356  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358  assert(TII->isMUBUF(MI));
359  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361  "should only be seeing frame offset relative FrameIndex");
362 
363 
364  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365  int64_t NewOffset = OffsetOp->getImm() + Offset;
366  assert(isUInt<12>(NewOffset) && "offset should be legal");
367 
368  FIOp->ChangeToRegister(BaseReg, false);
369  OffsetOp->setImm(NewOffset);
370 }
371 
373  unsigned BaseReg,
374  int64_t Offset) const {
375  if (!SIInstrInfo::isMUBUF(*MI))
376  return false;
377 
378  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
379 
380  return isUInt<12>(NewOffset);
381 }
382 
384  const MachineFunction &MF, unsigned Kind) const {
385  // This is inaccurate. It depends on the instruction and address space. The
386  // only place where we should hit this is for dealing with frame indexes /
387  // private accesses, so this is correct in that case.
388  return &AMDGPU::VGPR_32RegClass;
389 }
390 
391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
392 
393  switch (Op) {
394  case AMDGPU::SI_SPILL_S512_SAVE:
395  case AMDGPU::SI_SPILL_S512_RESTORE:
396  case AMDGPU::SI_SPILL_V512_SAVE:
397  case AMDGPU::SI_SPILL_V512_RESTORE:
398  return 16;
399  case AMDGPU::SI_SPILL_S256_SAVE:
400  case AMDGPU::SI_SPILL_S256_RESTORE:
401  case AMDGPU::SI_SPILL_V256_SAVE:
402  case AMDGPU::SI_SPILL_V256_RESTORE:
403  return 8;
404  case AMDGPU::SI_SPILL_S128_SAVE:
405  case AMDGPU::SI_SPILL_S128_RESTORE:
406  case AMDGPU::SI_SPILL_V128_SAVE:
407  case AMDGPU::SI_SPILL_V128_RESTORE:
408  return 4;
409  case AMDGPU::SI_SPILL_V96_SAVE:
410  case AMDGPU::SI_SPILL_V96_RESTORE:
411  return 3;
412  case AMDGPU::SI_SPILL_S64_SAVE:
413  case AMDGPU::SI_SPILL_S64_RESTORE:
414  case AMDGPU::SI_SPILL_V64_SAVE:
415  case AMDGPU::SI_SPILL_V64_RESTORE:
416  return 2;
417  case AMDGPU::SI_SPILL_S32_SAVE:
418  case AMDGPU::SI_SPILL_S32_RESTORE:
419  case AMDGPU::SI_SPILL_V32_SAVE:
420  case AMDGPU::SI_SPILL_V32_RESTORE:
421  return 1;
422  default: llvm_unreachable("Invalid spill opcode");
423  }
424 }
425 
426 static int getOffsetMUBUFStore(unsigned Opc) {
427  switch (Opc) {
428  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
442  default:
443  return -1;
444  }
445 }
446 
447 static int getOffsetMUBUFLoad(unsigned Opc) {
448  switch (Opc) {
449  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
475  default:
476  return -1;
477  }
478 }
479 
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
483  MachineFrameInfo &MFI,
485  int Index,
486  int64_t Offset) {
487  MachineBasicBlock *MBB = MI->getParent();
488  const DebugLoc &DL = MI->getDebugLoc();
489  bool IsStore = MI->mayStore();
490 
491  unsigned Opc = MI->getOpcode();
492  int LoadStoreOp = IsStore ?
494  if (LoadStoreOp == -1)
495  return false;
496 
497  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498  MachineInstrBuilder NewMI =
499  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
500  .add(*Reg)
501  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
502  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
503  .addImm(Offset)
504  .addImm(0) // glc
505  .addImm(0) // slc
506  .addImm(0) // tfe
507  .cloneMemRefs(*MI);
508 
509  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
510  AMDGPU::OpName::vdata_in);
511  if (VDataIn)
512  NewMI.add(*VDataIn);
513  return true;
514 }
515 
516 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
517  unsigned LoadStoreOp,
518  int Index,
519  unsigned ValueReg,
520  bool IsKill,
521  unsigned ScratchRsrcReg,
522  unsigned ScratchOffsetReg,
523  int64_t InstOffset,
524  MachineMemOperand *MMO,
525  RegScavenger *RS) const {
526  MachineBasicBlock *MBB = MI->getParent();
527  MachineFunction *MF = MI->getParent()->getParent();
528  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
529  const SIInstrInfo *TII = ST.getInstrInfo();
530  const MachineFrameInfo &MFI = MF->getFrameInfo();
531 
532  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
533  const DebugLoc &DL = MI->getDebugLoc();
534  bool IsStore = Desc.mayStore();
535 
536  bool Scavenged = false;
537  unsigned SOffset = ScratchOffsetReg;
538 
539  const unsigned EltSize = 4;
540  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
541  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
542  unsigned Size = NumSubRegs * EltSize;
543  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
544  int64_t ScratchOffsetRegDelta = 0;
545 
546  unsigned Align = MFI.getObjectAlignment(Index);
547  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
548 
549  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
550 
551  if (!isUInt<12>(Offset + Size - EltSize)) {
552  SOffset = AMDGPU::NoRegister;
553 
554  // We currently only support spilling VGPRs to EltSize boundaries, meaning
555  // we can simplify the adjustment of Offset here to just scale with
556  // WavefrontSize.
557  Offset *= ST.getWavefrontSize();
558 
559  // We don't have access to the register scavenger if this function is called
560  // during PEI::scavengeFrameVirtualRegs().
561  if (RS)
562  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
563 
564  if (SOffset == AMDGPU::NoRegister) {
565  // There are no free SGPRs, and since we are in the process of spilling
566  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
567  // on SI/CI and on VI it is true until we implement spilling using scalar
568  // stores), we have no way to free up an SGPR. Our solution here is to
569  // add the offset directly to the ScratchOffset register, and then
570  // subtract the offset after the spill to return ScratchOffset to it's
571  // original value.
572  SOffset = ScratchOffsetReg;
573  ScratchOffsetRegDelta = Offset;
574  } else {
575  Scavenged = true;
576  }
577 
578  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
579  .addReg(ScratchOffsetReg)
580  .addImm(Offset);
581 
582  Offset = 0;
583  }
584 
585  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
586  unsigned SubReg = NumSubRegs == 1 ?
587  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
588 
589  unsigned SOffsetRegState = 0;
590  unsigned SrcDstRegState = getDefRegState(!IsStore);
591  if (i + 1 == e) {
592  SOffsetRegState |= getKillRegState(Scavenged);
593  // The last implicit use carries the "Kill" flag.
594  SrcDstRegState |= getKillRegState(IsKill);
595  }
596 
597  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
598  MachineMemOperand *NewMMO
599  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
600  EltSize, MinAlign(Align, EltSize * i));
601 
602  auto MIB = BuildMI(*MBB, MI, DL, Desc)
603  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
604  .addReg(ScratchRsrcReg)
605  .addReg(SOffset, SOffsetRegState)
606  .addImm(Offset)
607  .addImm(0) // glc
608  .addImm(0) // slc
609  .addImm(0) // tfe
610  .addMemOperand(NewMMO);
611 
612  if (NumSubRegs > 1)
613  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
614  }
615 
616  if (ScratchOffsetRegDelta != 0) {
617  // Subtract the offset we added to the ScratchOffset register.
618  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
619  .addReg(ScratchOffsetReg)
620  .addImm(ScratchOffsetRegDelta);
621  }
622 }
623 
624 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
625  bool Store) {
626  if (SuperRegSize % 16 == 0) {
627  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
628  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
629  }
630 
631  if (SuperRegSize % 8 == 0) {
632  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
633  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
634  }
635 
636  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
637  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
638 }
639 
641  int Index,
642  RegScavenger *RS,
643  bool OnlyToVGPR) const {
644  MachineBasicBlock *MBB = MI->getParent();
645  MachineFunction *MF = MBB->getParent();
647  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
648 
650  = MFI->getSGPRToVGPRSpills(Index);
651  bool SpillToVGPR = !VGPRSpills.empty();
652  if (OnlyToVGPR && !SpillToVGPR)
653  return false;
654 
656  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
657  const SIInstrInfo *TII = ST.getInstrInfo();
658 
659  unsigned SuperReg = MI->getOperand(0).getReg();
660  bool IsKill = MI->getOperand(0).isKill();
661  const DebugLoc &DL = MI->getDebugLoc();
662 
663  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
664 
665  bool SpillToSMEM = spillSGPRToSMEM();
666  if (SpillToSMEM && OnlyToVGPR)
667  return false;
668 
669  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
670  SuperReg != MFI->getFrameOffsetReg() &&
671  SuperReg != MFI->getScratchWaveOffsetReg()));
672 
673  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
674 
675  unsigned OffsetReg = AMDGPU::M0;
676  unsigned M0CopyReg = AMDGPU::NoRegister;
677 
678  if (SpillToSMEM) {
679  if (RS->isRegUsed(AMDGPU::M0)) {
680  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
681  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
682  .addReg(AMDGPU::M0);
683  }
684  }
685 
686  unsigned ScalarStoreOp;
687  unsigned EltSize = 4;
688  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
689  if (SpillToSMEM && isSGPRClass(RC)) {
690  // XXX - if private_element_size is larger than 4 it might be useful to be
691  // able to spill wider vmem spills.
692  std::tie(EltSize, ScalarStoreOp) =
693  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
694  }
695 
696  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
697  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
698 
699  // SubReg carries the "Kill" flag when SubReg == SuperReg.
700  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
701  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
702  unsigned SubReg = NumSubRegs == 1 ?
703  SuperReg : getSubReg(SuperReg, SplitParts[i]);
704 
705  if (SpillToSMEM) {
706  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
707 
708  // The allocated memory size is really the wavefront size * the frame
709  // index size. The widest register class is 64 bytes, so a 4-byte scratch
710  // allocation is enough to spill this in a single stack object.
711  //
712  // FIXME: Frame size/offsets are computed earlier than this, so the extra
713  // space is still unnecessarily allocated.
714 
715  unsigned Align = FrameInfo.getObjectAlignment(Index);
716  MachinePointerInfo PtrInfo
717  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
718  MachineMemOperand *MMO
720  EltSize, MinAlign(Align, EltSize * i));
721 
722  // SMEM instructions only support a single offset, so increment the wave
723  // offset.
724 
725  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
726  if (Offset != 0) {
727  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
728  .addReg(MFI->getFrameOffsetReg())
729  .addImm(Offset);
730  } else {
731  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
732  .addReg(MFI->getFrameOffsetReg());
733  }
734 
735  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
736  .addReg(SubReg, getKillRegState(IsKill)) // sdata
737  .addReg(MFI->getScratchRSrcReg()) // sbase
738  .addReg(OffsetReg, RegState::Kill) // soff
739  .addImm(0) // glc
740  .addMemOperand(MMO);
741 
742  continue;
743  }
744 
745  if (SpillToVGPR) {
746  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
747 
748  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
749  // only circumstance in which we say it is undefined is when it is the
750  // first spill to this VGPR in the first basic block.
751  bool VGPRDefined = true;
752  if (MBB == &MF->front())
753  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
754 
755  // Mark the "old value of vgpr" input undef only if this is the first sgpr
756  // spill to this specific vgpr in the first basic block.
757  BuildMI(*MBB, MI, DL,
758  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
759  Spill.VGPR)
760  .addReg(SubReg, getKillRegState(IsKill))
761  .addImm(Spill.Lane)
762  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
763 
764  // FIXME: Since this spills to another register instead of an actual
765  // frame index, we should delete the frame index when all references to
766  // it are fixed.
767  } else {
768  // XXX - Can to VGPR spill fail for some subregisters but not others?
769  if (OnlyToVGPR)
770  return false;
771 
772  // Spill SGPR to a frame index.
773  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
774  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
775  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
776 
778  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
779  .addReg(SubReg, SubKillState);
780 
781 
782  // There could be undef components of a spilled super register.
783  // TODO: Can we detect this and skip the spill?
784  if (NumSubRegs > 1) {
785  // The last implicit use of the SuperReg carries the "Kill" flag.
786  unsigned SuperKillState = 0;
787  if (i + 1 == e)
788  SuperKillState |= getKillRegState(IsKill);
789  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
790  }
791 
792  unsigned Align = FrameInfo.getObjectAlignment(Index);
793  MachinePointerInfo PtrInfo
794  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
795  MachineMemOperand *MMO
797  EltSize, MinAlign(Align, EltSize * i));
798  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
799  .addReg(TmpReg, RegState::Kill) // src
800  .addFrameIndex(Index) // vaddr
801  .addReg(MFI->getScratchRSrcReg()) // srrsrc
802  .addReg(MFI->getFrameOffsetReg()) // soffset
803  .addImm(i * 4) // offset
804  .addMemOperand(MMO);
805  }
806  }
807 
808  if (M0CopyReg != AMDGPU::NoRegister) {
809  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
810  .addReg(M0CopyReg, RegState::Kill);
811  }
812 
813  MI->eraseFromParent();
814  MFI->addToSpilledSGPRs(NumSubRegs);
815  return true;
816 }
817 
819  int Index,
820  RegScavenger *RS,
821  bool OnlyToVGPR) const {
822  MachineFunction *MF = MI->getParent()->getParent();
824  MachineBasicBlock *MBB = MI->getParent();
826 
828  = MFI->getSGPRToVGPRSpills(Index);
829  bool SpillToVGPR = !VGPRSpills.empty();
830  if (OnlyToVGPR && !SpillToVGPR)
831  return false;
832 
833  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
834  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
835  const SIInstrInfo *TII = ST.getInstrInfo();
836  const DebugLoc &DL = MI->getDebugLoc();
837 
838  unsigned SuperReg = MI->getOperand(0).getReg();
839  bool SpillToSMEM = spillSGPRToSMEM();
840  if (SpillToSMEM && OnlyToVGPR)
841  return false;
842 
843  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
844 
845  unsigned OffsetReg = AMDGPU::M0;
846  unsigned M0CopyReg = AMDGPU::NoRegister;
847 
848  if (SpillToSMEM) {
849  if (RS->isRegUsed(AMDGPU::M0)) {
850  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
851  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
852  .addReg(AMDGPU::M0);
853  }
854  }
855 
856  unsigned EltSize = 4;
857  unsigned ScalarLoadOp;
858 
859  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
860  if (SpillToSMEM && isSGPRClass(RC)) {
861  // XXX - if private_element_size is larger than 4 it might be useful to be
862  // able to spill wider vmem spills.
863  std::tie(EltSize, ScalarLoadOp) =
864  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
865  }
866 
867  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
868  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
869 
870  // SubReg carries the "Kill" flag when SubReg == SuperReg.
871  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
872 
873  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
874  unsigned SubReg = NumSubRegs == 1 ?
875  SuperReg : getSubReg(SuperReg, SplitParts[i]);
876 
877  if (SpillToSMEM) {
878  // FIXME: Size may be > 4 but extra bytes wasted.
879  unsigned Align = FrameInfo.getObjectAlignment(Index);
880  MachinePointerInfo PtrInfo
881  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
882  MachineMemOperand *MMO
884  EltSize, MinAlign(Align, EltSize * i));
885 
886  // Add i * 4 offset
887  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
888  if (Offset != 0) {
889  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
890  .addReg(MFI->getFrameOffsetReg())
891  .addImm(Offset);
892  } else {
893  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
894  .addReg(MFI->getFrameOffsetReg());
895  }
896 
897  auto MIB =
898  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
899  .addReg(MFI->getScratchRSrcReg()) // sbase
900  .addReg(OffsetReg, RegState::Kill) // soff
901  .addImm(0) // glc
902  .addMemOperand(MMO);
903 
904  if (NumSubRegs > 1)
905  MIB.addReg(SuperReg, RegState::ImplicitDefine);
906 
907  continue;
908  }
909 
910  if (SpillToVGPR) {
911  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
912  auto MIB =
913  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
914  SubReg)
915  .addReg(Spill.VGPR)
916  .addImm(Spill.Lane);
917 
918  if (NumSubRegs > 1)
919  MIB.addReg(SuperReg, RegState::ImplicitDefine);
920  } else {
921  if (OnlyToVGPR)
922  return false;
923 
924  // Restore SGPR from a stack slot.
925  // FIXME: We should use S_LOAD_DWORD here for VI.
926  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
927  unsigned Align = FrameInfo.getObjectAlignment(Index);
928 
929  MachinePointerInfo PtrInfo
930  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
931 
932  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
933  MachineMemOperand::MOLoad, EltSize,
934  MinAlign(Align, EltSize * i));
935 
936  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
937  .addFrameIndex(Index) // vaddr
938  .addReg(MFI->getScratchRSrcReg()) // srsrc
939  .addReg(MFI->getFrameOffsetReg()) // soffset
940  .addImm(i * 4) // offset
941  .addMemOperand(MMO);
942 
943  auto MIB =
944  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
945  .addReg(TmpReg, RegState::Kill);
946 
947  if (NumSubRegs > 1)
948  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
949  }
950  }
951 
952  if (M0CopyReg != AMDGPU::NoRegister) {
953  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
954  .addReg(M0CopyReg, RegState::Kill);
955  }
956 
957  MI->eraseFromParent();
958  return true;
959 }
960 
961 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
962 /// a VGPR and the stack slot can be safely eliminated when all other users are
963 /// handled.
966  int FI,
967  RegScavenger *RS) const {
968  switch (MI->getOpcode()) {
969  case AMDGPU::SI_SPILL_S512_SAVE:
970  case AMDGPU::SI_SPILL_S256_SAVE:
971  case AMDGPU::SI_SPILL_S128_SAVE:
972  case AMDGPU::SI_SPILL_S64_SAVE:
973  case AMDGPU::SI_SPILL_S32_SAVE:
974  return spillSGPR(MI, FI, RS, true);
975  case AMDGPU::SI_SPILL_S512_RESTORE:
976  case AMDGPU::SI_SPILL_S256_RESTORE:
977  case AMDGPU::SI_SPILL_S128_RESTORE:
978  case AMDGPU::SI_SPILL_S64_RESTORE:
979  case AMDGPU::SI_SPILL_S32_RESTORE:
980  return restoreSGPR(MI, FI, RS, true);
981  default:
982  llvm_unreachable("not an SGPR spill instruction");
983  }
984 }
985 
987  int SPAdj, unsigned FIOperandNum,
988  RegScavenger *RS) const {
989  MachineFunction *MF = MI->getParent()->getParent();
991  MachineBasicBlock *MBB = MI->getParent();
993  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
994  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
995  const SIInstrInfo *TII = ST.getInstrInfo();
996  DebugLoc DL = MI->getDebugLoc();
997 
998  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
999  int Index = MI->getOperand(FIOperandNum).getIndex();
1000 
1001  switch (MI->getOpcode()) {
1002  // SGPR register spill
1003  case AMDGPU::SI_SPILL_S512_SAVE:
1004  case AMDGPU::SI_SPILL_S256_SAVE:
1005  case AMDGPU::SI_SPILL_S128_SAVE:
1006  case AMDGPU::SI_SPILL_S64_SAVE:
1007  case AMDGPU::SI_SPILL_S32_SAVE: {
1008  spillSGPR(MI, Index, RS);
1009  break;
1010  }
1011 
1012  // SGPR register restore
1013  case AMDGPU::SI_SPILL_S512_RESTORE:
1014  case AMDGPU::SI_SPILL_S256_RESTORE:
1015  case AMDGPU::SI_SPILL_S128_RESTORE:
1016  case AMDGPU::SI_SPILL_S64_RESTORE:
1017  case AMDGPU::SI_SPILL_S32_RESTORE: {
1018  restoreSGPR(MI, Index, RS);
1019  break;
1020  }
1021 
1022  // VGPR register spill
1023  case AMDGPU::SI_SPILL_V512_SAVE:
1024  case AMDGPU::SI_SPILL_V256_SAVE:
1025  case AMDGPU::SI_SPILL_V128_SAVE:
1026  case AMDGPU::SI_SPILL_V96_SAVE:
1027  case AMDGPU::SI_SPILL_V64_SAVE:
1028  case AMDGPU::SI_SPILL_V32_SAVE: {
1029  const MachineOperand *VData = TII->getNamedOperand(*MI,
1030  AMDGPU::OpName::vdata);
1031  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1032  Index,
1033  VData->getReg(), VData->isKill(),
1034  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1035  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1036  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1037  *MI->memoperands_begin(),
1038  RS);
1039  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1040  MI->eraseFromParent();
1041  break;
1042  }
1043  case AMDGPU::SI_SPILL_V32_RESTORE:
1044  case AMDGPU::SI_SPILL_V64_RESTORE:
1045  case AMDGPU::SI_SPILL_V96_RESTORE:
1046  case AMDGPU::SI_SPILL_V128_RESTORE:
1047  case AMDGPU::SI_SPILL_V256_RESTORE:
1048  case AMDGPU::SI_SPILL_V512_RESTORE: {
1049  const MachineOperand *VData = TII->getNamedOperand(*MI,
1050  AMDGPU::OpName::vdata);
1051 
1052  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1053  Index,
1054  VData->getReg(), VData->isKill(),
1055  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1056  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1057  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1058  *MI->memoperands_begin(),
1059  RS);
1060  MI->eraseFromParent();
1061  break;
1062  }
1063 
1064  default: {
1065  const DebugLoc &DL = MI->getDebugLoc();
1066  bool IsMUBUF = TII->isMUBUF(*MI);
1067 
1068  if (!IsMUBUF &&
1069  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1070  // Convert to an absolute stack address by finding the offset from the
1071  // scratch wave base and scaling by the wave size.
1072  //
1073  // In an entry function/kernel the stack address is already the
1074  // absolute address relative to the scratch wave offset.
1075 
1076  unsigned DiffReg
1077  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1078 
1079  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1080  unsigned ResultReg = IsCopy ?
1081  MI->getOperand(0).getReg() :
1082  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1083 
1084  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1085  .addReg(MFI->getFrameOffsetReg())
1086  .addReg(MFI->getScratchWaveOffsetReg());
1087 
1088  int64_t Offset = FrameInfo.getObjectOffset(Index);
1089  if (Offset == 0) {
1090  // XXX - This never happens because of emergency scavenging slot at 0?
1091  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1092  .addImm(Log2_32(ST.getWavefrontSize()))
1093  .addReg(DiffReg);
1094  } else {
1095  unsigned ScaledReg
1096  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1097 
1098  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1099  .addImm(Log2_32(ST.getWavefrontSize()))
1100  .addReg(DiffReg, RegState::Kill);
1101 
1102  // TODO: Fold if use instruction is another add of a constant.
1104  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1105  .addImm(Offset)
1106  .addReg(ScaledReg, RegState::Kill);
1107  } else {
1108  unsigned ConstOffsetReg
1109  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1110 
1111  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1112  .addImm(Offset);
1113  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1114  .addReg(ConstOffsetReg, RegState::Kill)
1115  .addReg(ScaledReg, RegState::Kill);
1116  }
1117  }
1118 
1119  // Don't introduce an extra copy if we're just materializing in a mov.
1120  if (IsCopy)
1121  MI->eraseFromParent();
1122  else
1123  FIOp.ChangeToRegister(ResultReg, false, false, true);
1124  return;
1125  }
1126 
1127  if (IsMUBUF) {
1128  // Disable offen so we don't need a 0 vgpr base.
1129  assert(static_cast<int>(FIOperandNum) ==
1130  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1131  AMDGPU::OpName::vaddr));
1132 
1133  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1134  == MFI->getFrameOffsetReg());
1135 
1136  int64_t Offset = FrameInfo.getObjectOffset(Index);
1137  int64_t OldImm
1138  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1139  int64_t NewOffset = OldImm + Offset;
1140 
1141  if (isUInt<12>(NewOffset) &&
1142  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1143  MI->eraseFromParent();
1144  return;
1145  }
1146  }
1147 
1148  // If the offset is simply too big, don't convert to a scratch wave offset
1149  // relative index.
1150 
1151  int64_t Offset = FrameInfo.getObjectOffset(Index);
1152  FIOp.ChangeToImmediate(Offset);
1153  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1154  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1155  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1156  .addImm(Offset);
1157  FIOp.ChangeToRegister(TmpReg, false, false, true);
1158  }
1159  }
1160  }
1161 }
1162 
1164  #define AMDGPU_REG_ASM_NAMES
1165  #include "AMDGPURegAsmNames.inc.cpp"
1166 
1167  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1168  if (Reg >= BeginReg && Reg <= EndReg) { \
1169  unsigned Index = Reg - BeginReg; \
1170  assert(Index < array_lengthof(RegTable)); \
1171  return RegTable[Index]; \
1172  }
1173 
1174  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1175  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1176  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1177  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1178  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1179  VGPR96RegNames);
1180 
1181  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1182  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1183  VGPR128RegNames);
1184  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1185  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1186  SGPR128RegNames);
1187 
1188  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1189  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1190  VGPR256RegNames);
1191 
1192  REG_RANGE(
1193  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1194  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1195  VGPR512RegNames);
1196 
1197  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1198  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1199  SGPR256RegNames);
1200 
1201  REG_RANGE(
1202  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1203  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1204  SGPR512RegNames
1205  );
1206 
1207 #undef REG_RANGE
1208 
1209  // FIXME: Rename flat_scr so we don't need to special case this.
1210  switch (Reg) {
1211  case AMDGPU::FLAT_SCR:
1212  return "flat_scratch";
1213  case AMDGPU::FLAT_SCR_LO:
1214  return "flat_scratch_lo";
1215  case AMDGPU::FLAT_SCR_HI:
1216  return "flat_scratch_hi";
1217  default:
1218  // For the special named registers the default is fine.
1220  }
1221 }
1222 
1223 // FIXME: This is very slow. It might be worth creating a map from physreg to
1224 // register class.
1227 
1228  static const TargetRegisterClass *const BaseClasses[] = {
1229  &AMDGPU::VGPR_32RegClass,
1230  &AMDGPU::SReg_32RegClass,
1231  &AMDGPU::VReg_64RegClass,
1232  &AMDGPU::SReg_64RegClass,
1233  &AMDGPU::VReg_96RegClass,
1234  &AMDGPU::VReg_128RegClass,
1235  &AMDGPU::SReg_128RegClass,
1236  &AMDGPU::VReg_256RegClass,
1237  &AMDGPU::SReg_256RegClass,
1238  &AMDGPU::VReg_512RegClass,
1239  &AMDGPU::SReg_512RegClass,
1240  &AMDGPU::SCC_CLASSRegClass,
1241  &AMDGPU::Pseudo_SReg_32RegClass,
1242  &AMDGPU::Pseudo_SReg_128RegClass,
1243  };
1244 
1245  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1246  if (BaseClass->contains(Reg)) {
1247  return BaseClass;
1248  }
1249  }
1250  return nullptr;
1251 }
1252 
1253 // TODO: It might be helpful to have some target specific flags in
1254 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1256  unsigned Size = getRegSizeInBits(*RC);
1257  if (Size < 32)
1258  return false;
1259  switch (Size) {
1260  case 32:
1261  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1262  case 64:
1263  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1264  case 96:
1265  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1266  case 128:
1267  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1268  case 256:
1269  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1270  case 512:
1271  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1272  default:
1273  llvm_unreachable("Invalid register class size");
1274  }
1275 }
1276 
1278  const TargetRegisterClass *SRC) const {
1279  switch (getRegSizeInBits(*SRC)) {
1280  case 32:
1281  return &AMDGPU::VGPR_32RegClass;
1282  case 64:
1283  return &AMDGPU::VReg_64RegClass;
1284  case 96:
1285  return &AMDGPU::VReg_96RegClass;
1286  case 128:
1287  return &AMDGPU::VReg_128RegClass;
1288  case 256:
1289  return &AMDGPU::VReg_256RegClass;
1290  case 512:
1291  return &AMDGPU::VReg_512RegClass;
1292  default:
1293  llvm_unreachable("Invalid register class size");
1294  }
1295 }
1296 
1298  const TargetRegisterClass *VRC) const {
1299  switch (getRegSizeInBits(*VRC)) {
1300  case 32:
1301  return &AMDGPU::SGPR_32RegClass;
1302  case 64:
1303  return &AMDGPU::SReg_64RegClass;
1304  case 128:
1305  return &AMDGPU::SReg_128RegClass;
1306  case 256:
1307  return &AMDGPU::SReg_256RegClass;
1308  case 512:
1309  return &AMDGPU::SReg_512RegClass;
1310  default:
1311  llvm_unreachable("Invalid register class size");
1312  }
1313 }
1314 
1316  const TargetRegisterClass *RC, unsigned SubIdx) const {
1317  if (SubIdx == AMDGPU::NoSubRegister)
1318  return RC;
1319 
1320  // We can assume that each lane corresponds to one 32-bit register.
1321  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1322  if (isSGPRClass(RC)) {
1323  switch (Count) {
1324  case 1:
1325  return &AMDGPU::SGPR_32RegClass;
1326  case 2:
1327  return &AMDGPU::SReg_64RegClass;
1328  case 4:
1329  return &AMDGPU::SReg_128RegClass;
1330  case 8:
1331  return &AMDGPU::SReg_256RegClass;
1332  case 16: /* fall-through */
1333  default:
1334  llvm_unreachable("Invalid sub-register class size");
1335  }
1336  } else {
1337  switch (Count) {
1338  case 1:
1339  return &AMDGPU::VGPR_32RegClass;
1340  case 2:
1341  return &AMDGPU::VReg_64RegClass;
1342  case 3:
1343  return &AMDGPU::VReg_96RegClass;
1344  case 4:
1345  return &AMDGPU::VReg_128RegClass;
1346  case 8:
1347  return &AMDGPU::VReg_256RegClass;
1348  case 16: /* fall-through */
1349  default:
1350  llvm_unreachable("Invalid sub-register class size");
1351  }
1352  }
1353 }
1354 
1356  const TargetRegisterClass *DefRC,
1357  unsigned DefSubReg,
1358  const TargetRegisterClass *SrcRC,
1359  unsigned SrcSubReg) const {
1360  // We want to prefer the smallest register class possible, so we don't want to
1361  // stop and rewrite on anything that looks like a subregister
1362  // extract. Operations mostly don't care about the super register class, so we
1363  // only want to stop on the most basic of copies between the same register
1364  // class.
1365  //
1366  // e.g. if we have something like
1367  // %0 = ...
1368  // %1 = ...
1369  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1370  // %3 = COPY %2, sub0
1371  //
1372  // We want to look through the COPY to find:
1373  // => %3 = COPY %0
1374 
1375  // Plain copy.
1376  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1377 }
1378 
1379 /// Returns a register that is not used at any point in the function.
1380 /// If all registers are used, then this function will return
1381 // AMDGPU::NoRegister.
1382 unsigned
1384  const TargetRegisterClass *RC,
1385  const MachineFunction &MF) const {
1386 
1387  for (unsigned Reg : *RC)
1388  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1389  return Reg;
1390  return AMDGPU::NoRegister;
1391 }
1392 
1394  unsigned EltSize) const {
1395  if (EltSize == 4) {
1396  static const int16_t Sub0_15[] = {
1397  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1398  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1399  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1400  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1401  };
1402 
1403  static const int16_t Sub0_7[] = {
1404  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1405  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1406  };
1407 
1408  static const int16_t Sub0_3[] = {
1409  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1410  };
1411 
1412  static const int16_t Sub0_2[] = {
1413  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1414  };
1415 
1416  static const int16_t Sub0_1[] = {
1417  AMDGPU::sub0, AMDGPU::sub1,
1418  };
1419 
1420  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1421  case 32:
1422  return {};
1423  case 64:
1424  return makeArrayRef(Sub0_1);
1425  case 96:
1426  return makeArrayRef(Sub0_2);
1427  case 128:
1428  return makeArrayRef(Sub0_3);
1429  case 256:
1430  return makeArrayRef(Sub0_7);
1431  case 512:
1432  return makeArrayRef(Sub0_15);
1433  default:
1434  llvm_unreachable("unhandled register size");
1435  }
1436  }
1437 
1438  if (EltSize == 8) {
1439  static const int16_t Sub0_15_64[] = {
1440  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1441  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1442  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1443  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1444  };
1445 
1446  static const int16_t Sub0_7_64[] = {
1447  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1448  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1449  };
1450 
1451 
1452  static const int16_t Sub0_3_64[] = {
1453  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1454  };
1455 
1456  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1457  case 64:
1458  return {};
1459  case 128:
1460  return makeArrayRef(Sub0_3_64);
1461  case 256:
1462  return makeArrayRef(Sub0_7_64);
1463  case 512:
1464  return makeArrayRef(Sub0_15_64);
1465  default:
1466  llvm_unreachable("unhandled register size");
1467  }
1468  }
1469 
1470  assert(EltSize == 16 && "unhandled register spill split size");
1471 
1472  static const int16_t Sub0_15_128[] = {
1473  AMDGPU::sub0_sub1_sub2_sub3,
1474  AMDGPU::sub4_sub5_sub6_sub7,
1475  AMDGPU::sub8_sub9_sub10_sub11,
1476  AMDGPU::sub12_sub13_sub14_sub15
1477  };
1478 
1479  static const int16_t Sub0_7_128[] = {
1480  AMDGPU::sub0_sub1_sub2_sub3,
1481  AMDGPU::sub4_sub5_sub6_sub7
1482  };
1483 
1484  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1485  case 128:
1486  return {};
1487  case 256:
1488  return makeArrayRef(Sub0_7_128);
1489  case 512:
1490  return makeArrayRef(Sub0_15_128);
1491  default:
1492  llvm_unreachable("unhandled register size");
1493  }
1494 }
1495 
1496 const TargetRegisterClass*
1498  unsigned Reg) const {
1500  return MRI.getRegClass(Reg);
1501 
1502  return getPhysRegClass(Reg);
1503 }
1504 
1506  unsigned Reg) const {
1507  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1508  assert(RC && "Register class for the reg not found");
1509  return hasVGPRs(RC);
1510 }
1511 
1513  const TargetRegisterClass *SrcRC,
1514  unsigned SubReg,
1515  const TargetRegisterClass *DstRC,
1516  unsigned DstSubReg,
1517  const TargetRegisterClass *NewRC,
1518  LiveIntervals &LIS) const {
1519  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1520  unsigned DstSize = getRegSizeInBits(*DstRC);
1521  unsigned NewSize = getRegSizeInBits(*NewRC);
1522 
1523  // Do not increase size of registers beyond dword, we would need to allocate
1524  // adjacent registers and constraint regalloc more than needed.
1525 
1526  // Always allow dword coalescing.
1527  if (SrcSize <= 32 || DstSize <= 32)
1528  return true;
1529 
1530  return NewSize <= DstSize || NewSize <= SrcSize;
1531 }
1532 
1534  MachineFunction &MF) const {
1535 
1536  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1538 
1539  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1540  MF.getFunction());
1541  switch (RC->getID()) {
1542  default:
1543  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1544  case AMDGPU::VGPR_32RegClassID:
1545  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1546  case AMDGPU::SGPR_32RegClassID:
1547  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1548  }
1549 }
1550 
1552  unsigned Idx) const {
1553  if (Idx == getVGPRPressureSet())
1554  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1555  const_cast<MachineFunction &>(MF));
1556 
1557  if (Idx == getSGPRPressureSet())
1558  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1559  const_cast<MachineFunction &>(MF));
1560 
1561  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1562 }
1563 
1564 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1565  static const int Empty[] = { -1 };
1566 
1567  if (hasRegUnit(AMDGPU::M0, RegUnit))
1568  return Empty;
1569  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1570 }
1571 
1573  // Not a callee saved register.
1574  return AMDGPU::SGPR30_SGPR31;
1575 }
1576 
1577 const TargetRegisterClass *
1579  const MachineRegisterInfo &MRI) const {
1580  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1581  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1582  if (!RB)
1583  return nullptr;
1584 
1585  switch (Size) {
1586  case 32:
1587  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1588  &AMDGPU::SReg_32_XM0RegClass;
1589  case 64:
1590  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1591  &AMDGPU::SReg_64_XEXECRegClass;
1592  case 96:
1593  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1594  nullptr;
1595  case 128:
1596  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1597  &AMDGPU::SReg_128RegClass;
1598  default:
1599  llvm_unreachable("not implemented");
1600  }
1601 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:830
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:34
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718
A description of a memory reference used in the backend.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:409
unsigned FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
This class implements the register bank concept.
Definition: RegisterBank.h:29
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:399
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
const unsigned Kind
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:48
bool hasCalls() const
Return true if the current function has any function calls.