LLVM  9.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/LLVMContext.h"
28 
29 using namespace llvm;
30 
31 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
32  for (unsigned i = 0; PSets[i] != -1; ++i) {
33  if (PSets[i] == (int)PSetID)
34  return true;
35  }
36  return false;
37 }
38 
39 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
40  BitVector &PressureSets) const {
41  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
42  const int *PSets = getRegUnitPressureSets(*U);
43  if (hasPressureSet(PSets, PSetID)) {
44  PressureSets.set(PSetID);
45  break;
46  }
47  }
48 }
49 
51  "amdgpu-spill-sgpr-to-smem",
52  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
53  cl::init(false));
54 
56  "amdgpu-spill-sgpr-to-vgpr",
57  cl::desc("Enable spilling VGPRs to SGPRs"),
59  cl::init(true));
60 
63  SGPRPressureSets(getNumRegPressureSets()),
64  VGPRPressureSets(getNumRegPressureSets()),
65  SpillSGPRToVGPR(false),
66  SpillSGPRToSMEM(false) {
67  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
68  SpillSGPRToSMEM = true;
69  else if (EnableSpillSGPRToVGPR)
70  SpillSGPRToVGPR = true;
71 
72  unsigned NumRegPressureSets = getNumRegPressureSets();
73 
74  SGPRSetID = NumRegPressureSets;
75  VGPRSetID = NumRegPressureSets;
76 
77  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
78  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
79  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
80  }
81 
82  // Determine the number of reg units for each pressure set.
83  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
84  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
85  const int *PSets = getRegUnitPressureSets(i);
86  for (unsigned j = 0; PSets[j] != -1; ++j) {
87  ++PressureSetRegUnits[PSets[j]];
88  }
89  }
90 
91  unsigned VGPRMax = 0, SGPRMax = 0;
92  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
93  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
94  VGPRSetID = i;
95  VGPRMax = PressureSetRegUnits[i];
96  continue;
97  }
98  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
99  SGPRSetID = i;
100  SGPRMax = PressureSetRegUnits[i];
101  }
102  }
103 
104  assert(SGPRSetID < NumRegPressureSets &&
105  VGPRSetID < NumRegPressureSets);
106 }
107 
109  const MachineFunction &MF) const {
110 
111  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
112  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
113  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
114  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
115 }
116 
117 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
118  unsigned Reg;
119 
120  // Try to place it in a hole after PrivateSegmentBufferReg.
121  if (RegCount & 3) {
122  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
123  // alignment constraints, so we have a hole where can put the wave offset.
124  Reg = RegCount - 1;
125  } else {
126  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
127  // wave offset before it.
128  Reg = RegCount - 5;
129  }
130 
131  return Reg;
132 }
133 
135  const MachineFunction &MF) const {
136  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
138  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
139 }
140 
142  const MachineFunction &MF) const {
143  return AMDGPU::SGPR32;
144 }
145 
147  BitVector Reserved(getNumRegs());
148 
149  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
150  // this seems likely to result in bugs, so I'm marking them as reserved.
151  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
152  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
153 
154  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
155  reserveRegisterTuples(Reserved, AMDGPU::M0);
156 
157  // Reserve the memory aperture registers.
158  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
160  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
161  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
162 
163  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
164  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
165 
166  // Reserve xnack_mask registers - support is not implemented in Codegen.
167  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
168 
169  // Reserve lds_direct register - support is not implemented in Codegen.
170  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
171 
172  // Reserve Trap Handler registers - support is not implemented in Codegen.
173  reserveRegisterTuples(Reserved, AMDGPU::TBA);
174  reserveRegisterTuples(Reserved, AMDGPU::TMA);
175  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
176  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
177  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
178  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
179  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
180  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
181  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
182  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
183 
184  // Reserve null register - it shall never be allocated
185  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
186 
187  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
188 
189  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
190  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
191  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
192  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
193  reserveRegisterTuples(Reserved, Reg);
194  }
195 
196  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
197  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
198  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
199  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
200  reserveRegisterTuples(Reserved, Reg);
201  }
202 
204 
205  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
206  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
207  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
208  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
209  }
210 
211  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
212  if (ScratchRSrcReg != AMDGPU::NoRegister) {
213  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
214  // to spill.
215  // TODO: May need to reserve a VGPR if doing LDS spilling.
216  reserveRegisterTuples(Reserved, ScratchRSrcReg);
217  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
218  }
219 
220  // We have to assume the SP is needed in case there are calls in the function,
221  // which is detected after the function is lowered. If we aren't really going
222  // to need SP, don't bother reserving it.
223  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
224 
225  if (StackPtrReg != AMDGPU::NoRegister) {
226  reserveRegisterTuples(Reserved, StackPtrReg);
227  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
228  }
229 
230  unsigned FrameReg = MFI->getFrameOffsetReg();
231  if (FrameReg != AMDGPU::NoRegister) {
232  reserveRegisterTuples(Reserved, FrameReg);
233  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
234  }
235 
236  for (unsigned Reg : MFI->WWMReservedRegs) {
237  reserveRegisterTuples(Reserved, Reg);
238  }
239 
240  return Reserved;
241 }
242 
245  if (Info->isEntryFunction()) {
246  const MachineFrameInfo &MFI = Fn.getFrameInfo();
247  return MFI.hasStackObjects() || MFI.hasCalls();
248  }
249 
250  // May need scavenger for dealing with callee saved registers.
251  return true;
252 }
253 
255  const MachineFunction &MF) const {
256  const MachineFrameInfo &MFI = MF.getFrameInfo();
257  if (MFI.hasStackObjects())
258  return true;
259 
260  // May need to deal with callee saved registers.
262  return !Info->isEntryFunction();
263 }
264 
266  const MachineFunction &MF) const {
267  const MachineFrameInfo &MFI = MF.getFrameInfo();
268  if (!MFI.hasStackObjects())
269  return false;
270 
271  // The scavenger is used for large frames which may require finding a free
272  // register for large offsets.
273  if (!isUInt<12>(MFI.getStackSize()))
274  return true;
275 
276  // If using scalar stores, for spills, m0 is needed for the scalar store
277  // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
278  // register for it during frame index elimination, so the scavenger is
279  // directly needed.
280  return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
281  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
282 }
283 
285  const MachineFunction &) const {
286  // There are no special dedicated stack or frame pointers.
287  return true;
288 }
289 
291  // This helps catch bugs as verifier errors.
292  return true;
293 }
294 
297 
298  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
299  AMDGPU::OpName::offset);
300  return MI->getOperand(OffIdx).getImm();
301 }
302 
304  int Idx) const {
305  if (!SIInstrInfo::isMUBUF(*MI))
306  return 0;
307 
309  AMDGPU::OpName::vaddr) &&
310  "Should never see frame index on non-address operand");
311 
312  return getMUBUFInstrOffset(MI);
313 }
314 
316  if (!MI->mayLoadOrStore())
317  return false;
318 
319  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
320 
321  return !isUInt<12>(FullOffset);
322 }
323 
325  unsigned BaseReg,
326  int FrameIdx,
327  int64_t Offset) const {
329  DebugLoc DL; // Defaults to "unknown"
330 
331  if (Ins != MBB->end())
332  DL = Ins->getDebugLoc();
333 
334  MachineFunction *MF = MBB->getParent();
335  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
336  const SIInstrInfo *TII = Subtarget.getInstrInfo();
337 
338  if (Offset == 0) {
339  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
340  .addFrameIndex(FrameIdx);
341  return;
342  }
343 
345  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
346 
347  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
348 
349  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
350  .addImm(Offset);
351  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
352  .addFrameIndex(FrameIdx);
353 
354  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
355  .addReg(OffsetReg, RegState::Kill)
356  .addReg(FIReg)
357  .addImm(0); // clamp bit
358 }
359 
361  int64_t Offset) const {
362 
363  MachineBasicBlock *MBB = MI.getParent();
364  MachineFunction *MF = MBB->getParent();
365  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
366  const SIInstrInfo *TII = Subtarget.getInstrInfo();
367 
368 #ifndef NDEBUG
369  // FIXME: Is it possible to be storing a frame index to itself?
370  bool SeenFI = false;
371  for (const MachineOperand &MO: MI.operands()) {
372  if (MO.isFI()) {
373  if (SeenFI)
374  llvm_unreachable("should not see multiple frame indices");
375 
376  SeenFI = true;
377  }
378  }
379 #endif
380 
381  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
382  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
383  assert(TII->isMUBUF(MI));
384  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
385  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
386  "should only be seeing frame offset relative FrameIndex");
387 
388 
389  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
390  int64_t NewOffset = OffsetOp->getImm() + Offset;
391  assert(isUInt<12>(NewOffset) && "offset should be legal");
392 
393  FIOp->ChangeToRegister(BaseReg, false);
394  OffsetOp->setImm(NewOffset);
395 }
396 
398  unsigned BaseReg,
399  int64_t Offset) const {
400  if (!SIInstrInfo::isMUBUF(*MI))
401  return false;
402 
403  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
404 
405  return isUInt<12>(NewOffset);
406 }
407 
409  const MachineFunction &MF, unsigned Kind) const {
410  // This is inaccurate. It depends on the instruction and address space. The
411  // only place where we should hit this is for dealing with frame indexes /
412  // private accesses, so this is correct in that case.
413  return &AMDGPU::VGPR_32RegClass;
414 }
415 
416 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
417 
418  switch (Op) {
419  case AMDGPU::SI_SPILL_S512_SAVE:
420  case AMDGPU::SI_SPILL_S512_RESTORE:
421  case AMDGPU::SI_SPILL_V512_SAVE:
422  case AMDGPU::SI_SPILL_V512_RESTORE:
423  return 16;
424  case AMDGPU::SI_SPILL_S256_SAVE:
425  case AMDGPU::SI_SPILL_S256_RESTORE:
426  case AMDGPU::SI_SPILL_V256_SAVE:
427  case AMDGPU::SI_SPILL_V256_RESTORE:
428  return 8;
429  case AMDGPU::SI_SPILL_S160_SAVE:
430  case AMDGPU::SI_SPILL_S160_RESTORE:
431  case AMDGPU::SI_SPILL_V160_SAVE:
432  case AMDGPU::SI_SPILL_V160_RESTORE:
433  return 5;
434  case AMDGPU::SI_SPILL_S128_SAVE:
435  case AMDGPU::SI_SPILL_S128_RESTORE:
436  case AMDGPU::SI_SPILL_V128_SAVE:
437  case AMDGPU::SI_SPILL_V128_RESTORE:
438  return 4;
439  case AMDGPU::SI_SPILL_S96_SAVE:
440  case AMDGPU::SI_SPILL_S96_RESTORE:
441  case AMDGPU::SI_SPILL_V96_SAVE:
442  case AMDGPU::SI_SPILL_V96_RESTORE:
443  return 3;
444  case AMDGPU::SI_SPILL_S64_SAVE:
445  case AMDGPU::SI_SPILL_S64_RESTORE:
446  case AMDGPU::SI_SPILL_V64_SAVE:
447  case AMDGPU::SI_SPILL_V64_RESTORE:
448  return 2;
449  case AMDGPU::SI_SPILL_S32_SAVE:
450  case AMDGPU::SI_SPILL_S32_RESTORE:
451  case AMDGPU::SI_SPILL_V32_SAVE:
452  case AMDGPU::SI_SPILL_V32_RESTORE:
453  return 1;
454  default: llvm_unreachable("Invalid spill opcode");
455  }
456 }
457 
458 static int getOffsetMUBUFStore(unsigned Opc) {
459  switch (Opc) {
460  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
461  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
462  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
463  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
464  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
465  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
466  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
467  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
468  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
469  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
470  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
471  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
472  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
473  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
474  default:
475  return -1;
476  }
477 }
478 
479 static int getOffsetMUBUFLoad(unsigned Opc) {
480  switch (Opc) {
481  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
482  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
483  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
484  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
485  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
486  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
487  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
488  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
489  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
490  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
491  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
492  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
493  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
494  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
495  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
496  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
497  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
498  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
499  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
500  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
501  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
502  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
503  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
504  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
505  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
506  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
507  default:
508  return -1;
509  }
510 }
511 
512 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
513 // need to handle the case where an SGPR may need to be spilled while spilling.
515  MachineFrameInfo &MFI,
517  int Index,
518  int64_t Offset) {
519  MachineBasicBlock *MBB = MI->getParent();
520  const DebugLoc &DL = MI->getDebugLoc();
521  bool IsStore = MI->mayStore();
522 
523  unsigned Opc = MI->getOpcode();
524  int LoadStoreOp = IsStore ?
526  if (LoadStoreOp == -1)
527  return false;
528 
529  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
530  MachineInstrBuilder NewMI =
531  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
532  .add(*Reg)
533  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
534  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
535  .addImm(Offset)
536  .addImm(0) // glc
537  .addImm(0) // slc
538  .addImm(0) // tfe
539  .addImm(0) // dlc
540  .cloneMemRefs(*MI);
541 
542  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
543  AMDGPU::OpName::vdata_in);
544  if (VDataIn)
545  NewMI.add(*VDataIn);
546  return true;
547 }
548 
549 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
550  unsigned LoadStoreOp,
551  int Index,
552  unsigned ValueReg,
553  bool IsKill,
554  unsigned ScratchRsrcReg,
555  unsigned ScratchOffsetReg,
556  int64_t InstOffset,
557  MachineMemOperand *MMO,
558  RegScavenger *RS) const {
559  MachineBasicBlock *MBB = MI->getParent();
560  MachineFunction *MF = MI->getParent()->getParent();
561  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
562  const SIInstrInfo *TII = ST.getInstrInfo();
563  const MachineFrameInfo &MFI = MF->getFrameInfo();
564 
565  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
566  const DebugLoc &DL = MI->getDebugLoc();
567  bool IsStore = Desc.mayStore();
568 
569  bool Scavenged = false;
570  unsigned SOffset = ScratchOffsetReg;
571 
572  const unsigned EltSize = 4;
573  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
574  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
575  unsigned Size = NumSubRegs * EltSize;
576  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
577  int64_t ScratchOffsetRegDelta = 0;
578 
579  unsigned Align = MFI.getObjectAlignment(Index);
580  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
581 
582  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
583 
584  if (!isUInt<12>(Offset + Size - EltSize)) {
585  SOffset = AMDGPU::NoRegister;
586 
587  // We currently only support spilling VGPRs to EltSize boundaries, meaning
588  // we can simplify the adjustment of Offset here to just scale with
589  // WavefrontSize.
590  Offset *= ST.getWavefrontSize();
591 
592  // We don't have access to the register scavenger if this function is called
593  // during PEI::scavengeFrameVirtualRegs().
594  if (RS)
595  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
596 
597  if (SOffset == AMDGPU::NoRegister) {
598  // There are no free SGPRs, and since we are in the process of spilling
599  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
600  // on SI/CI and on VI it is true until we implement spilling using scalar
601  // stores), we have no way to free up an SGPR. Our solution here is to
602  // add the offset directly to the ScratchOffset register, and then
603  // subtract the offset after the spill to return ScratchOffset to it's
604  // original value.
605  SOffset = ScratchOffsetReg;
606  ScratchOffsetRegDelta = Offset;
607  } else {
608  Scavenged = true;
609  }
610 
611  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
612  .addReg(ScratchOffsetReg)
613  .addImm(Offset);
614 
615  Offset = 0;
616  }
617 
618  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
619  unsigned SubReg = NumSubRegs == 1 ?
620  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
621 
622  unsigned SOffsetRegState = 0;
623  unsigned SrcDstRegState = getDefRegState(!IsStore);
624  if (i + 1 == e) {
625  SOffsetRegState |= getKillRegState(Scavenged);
626  // The last implicit use carries the "Kill" flag.
627  SrcDstRegState |= getKillRegState(IsKill);
628  }
629 
630  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
631  MachineMemOperand *NewMMO
632  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
633  EltSize, MinAlign(Align, EltSize * i));
634 
635  auto MIB = BuildMI(*MBB, MI, DL, Desc)
636  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
637  .addReg(ScratchRsrcReg)
638  .addReg(SOffset, SOffsetRegState)
639  .addImm(Offset)
640  .addImm(0) // glc
641  .addImm(0) // slc
642  .addImm(0) // tfe
643  .addImm(0) // dlc
644  .addMemOperand(NewMMO);
645 
646  if (NumSubRegs > 1)
647  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
648  }
649 
650  if (ScratchOffsetRegDelta != 0) {
651  // Subtract the offset we added to the ScratchOffset register.
652  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
653  .addReg(ScratchOffsetReg)
654  .addImm(ScratchOffsetRegDelta);
655  }
656 }
657 
658 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
659  bool Store) {
660  if (SuperRegSize % 16 == 0) {
661  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
662  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
663  }
664 
665  if (SuperRegSize % 8 == 0) {
666  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
667  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
668  }
669 
670  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
671  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
672 }
673 
675  int Index,
676  RegScavenger *RS,
677  bool OnlyToVGPR) const {
678  MachineBasicBlock *MBB = MI->getParent();
679  MachineFunction *MF = MBB->getParent();
681  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
682 
684  = MFI->getSGPRToVGPRSpills(Index);
685  bool SpillToVGPR = !VGPRSpills.empty();
686  if (OnlyToVGPR && !SpillToVGPR)
687  return false;
688 
690  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
691  const SIInstrInfo *TII = ST.getInstrInfo();
692 
693  unsigned SuperReg = MI->getOperand(0).getReg();
694  bool IsKill = MI->getOperand(0).isKill();
695  const DebugLoc &DL = MI->getDebugLoc();
696 
697  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
698 
699  bool SpillToSMEM = spillSGPRToSMEM();
700  if (SpillToSMEM && OnlyToVGPR)
701  return false;
702 
703  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
704  SuperReg != MFI->getFrameOffsetReg() &&
705  SuperReg != MFI->getScratchWaveOffsetReg()));
706 
707  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
708 
709  unsigned OffsetReg = AMDGPU::M0;
710  unsigned M0CopyReg = AMDGPU::NoRegister;
711 
712  if (SpillToSMEM) {
713  if (RS->isRegUsed(AMDGPU::M0)) {
714  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
715  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
716  .addReg(AMDGPU::M0);
717  }
718  }
719 
720  unsigned ScalarStoreOp;
721  unsigned EltSize = 4;
722  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
723  if (SpillToSMEM && isSGPRClass(RC)) {
724  // XXX - if private_element_size is larger than 4 it might be useful to be
725  // able to spill wider vmem spills.
726  std::tie(EltSize, ScalarStoreOp) =
727  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
728  }
729 
730  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
731  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
732 
733  // SubReg carries the "Kill" flag when SubReg == SuperReg.
734  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
735  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
736  unsigned SubReg = NumSubRegs == 1 ?
737  SuperReg : getSubReg(SuperReg, SplitParts[i]);
738 
739  if (SpillToSMEM) {
740  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
741 
742  // The allocated memory size is really the wavefront size * the frame
743  // index size. The widest register class is 64 bytes, so a 4-byte scratch
744  // allocation is enough to spill this in a single stack object.
745  //
746  // FIXME: Frame size/offsets are computed earlier than this, so the extra
747  // space is still unnecessarily allocated.
748 
749  unsigned Align = FrameInfo.getObjectAlignment(Index);
750  MachinePointerInfo PtrInfo
751  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
752  MachineMemOperand *MMO
754  EltSize, MinAlign(Align, EltSize * i));
755 
756  // SMEM instructions only support a single offset, so increment the wave
757  // offset.
758 
759  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
760  if (Offset != 0) {
761  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
762  .addReg(MFI->getFrameOffsetReg())
763  .addImm(Offset);
764  } else {
765  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
766  .addReg(MFI->getFrameOffsetReg());
767  }
768 
769  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
770  .addReg(SubReg, getKillRegState(IsKill)) // sdata
771  .addReg(MFI->getScratchRSrcReg()) // sbase
772  .addReg(OffsetReg, RegState::Kill) // soff
773  .addImm(0) // glc
774  .addImm(0) // dlc
775  .addMemOperand(MMO);
776 
777  continue;
778  }
779 
780  if (SpillToVGPR) {
781  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
782 
783  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
784  // only circumstance in which we say it is undefined is when it is the
785  // first spill to this VGPR in the first basic block.
786  bool VGPRDefined = true;
787  if (MBB == &MF->front())
788  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
789 
790  // Mark the "old value of vgpr" input undef only if this is the first sgpr
791  // spill to this specific vgpr in the first basic block.
792  BuildMI(*MBB, MI, DL,
793  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
794  Spill.VGPR)
795  .addReg(SubReg, getKillRegState(IsKill))
796  .addImm(Spill.Lane)
797  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
798 
799  // FIXME: Since this spills to another register instead of an actual
800  // frame index, we should delete the frame index when all references to
801  // it are fixed.
802  } else {
803  // XXX - Can to VGPR spill fail for some subregisters but not others?
804  if (OnlyToVGPR)
805  return false;
806 
807  // Spill SGPR to a frame index.
808  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
809  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
810  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
811 
813  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
814  .addReg(SubReg, SubKillState);
815 
816 
817  // There could be undef components of a spilled super register.
818  // TODO: Can we detect this and skip the spill?
819  if (NumSubRegs > 1) {
820  // The last implicit use of the SuperReg carries the "Kill" flag.
821  unsigned SuperKillState = 0;
822  if (i + 1 == e)
823  SuperKillState |= getKillRegState(IsKill);
824  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
825  }
826 
827  unsigned Align = FrameInfo.getObjectAlignment(Index);
828  MachinePointerInfo PtrInfo
829  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
830  MachineMemOperand *MMO
832  EltSize, MinAlign(Align, EltSize * i));
833  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
834  .addReg(TmpReg, RegState::Kill) // src
835  .addFrameIndex(Index) // vaddr
836  .addReg(MFI->getScratchRSrcReg()) // srrsrc
837  .addReg(MFI->getFrameOffsetReg()) // soffset
838  .addImm(i * 4) // offset
839  .addMemOperand(MMO);
840  }
841  }
842 
843  if (M0CopyReg != AMDGPU::NoRegister) {
844  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
845  .addReg(M0CopyReg, RegState::Kill);
846  }
847 
848  MI->eraseFromParent();
849  MFI->addToSpilledSGPRs(NumSubRegs);
850  return true;
851 }
852 
854  int Index,
855  RegScavenger *RS,
856  bool OnlyToVGPR) const {
857  MachineFunction *MF = MI->getParent()->getParent();
859  MachineBasicBlock *MBB = MI->getParent();
861 
863  = MFI->getSGPRToVGPRSpills(Index);
864  bool SpillToVGPR = !VGPRSpills.empty();
865  if (OnlyToVGPR && !SpillToVGPR)
866  return false;
867 
868  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
869  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
870  const SIInstrInfo *TII = ST.getInstrInfo();
871  const DebugLoc &DL = MI->getDebugLoc();
872 
873  unsigned SuperReg = MI->getOperand(0).getReg();
874  bool SpillToSMEM = spillSGPRToSMEM();
875  if (SpillToSMEM && OnlyToVGPR)
876  return false;
877 
878  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
879 
880  unsigned OffsetReg = AMDGPU::M0;
881  unsigned M0CopyReg = AMDGPU::NoRegister;
882 
883  if (SpillToSMEM) {
884  if (RS->isRegUsed(AMDGPU::M0)) {
885  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
886  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
887  .addReg(AMDGPU::M0);
888  }
889  }
890 
891  unsigned EltSize = 4;
892  unsigned ScalarLoadOp;
893 
894  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
895  if (SpillToSMEM && isSGPRClass(RC)) {
896  // XXX - if private_element_size is larger than 4 it might be useful to be
897  // able to spill wider vmem spills.
898  std::tie(EltSize, ScalarLoadOp) =
899  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
900  }
901 
902  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
903  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
904 
905  // SubReg carries the "Kill" flag when SubReg == SuperReg.
906  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
907 
908  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
909  unsigned SubReg = NumSubRegs == 1 ?
910  SuperReg : getSubReg(SuperReg, SplitParts[i]);
911 
912  if (SpillToSMEM) {
913  // FIXME: Size may be > 4 but extra bytes wasted.
914  unsigned Align = FrameInfo.getObjectAlignment(Index);
915  MachinePointerInfo PtrInfo
916  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
917  MachineMemOperand *MMO
919  EltSize, MinAlign(Align, EltSize * i));
920 
921  // Add i * 4 offset
922  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
923  if (Offset != 0) {
924  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
925  .addReg(MFI->getFrameOffsetReg())
926  .addImm(Offset);
927  } else {
928  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
929  .addReg(MFI->getFrameOffsetReg());
930  }
931 
932  auto MIB =
933  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
934  .addReg(MFI->getScratchRSrcReg()) // sbase
935  .addReg(OffsetReg, RegState::Kill) // soff
936  .addImm(0) // glc
937  .addImm(0) // dlc
938  .addMemOperand(MMO);
939 
940  if (NumSubRegs > 1 && i == 0)
941  MIB.addReg(SuperReg, RegState::ImplicitDefine);
942 
943  continue;
944  }
945 
946  if (SpillToVGPR) {
947  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
948  auto MIB =
949  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
950  SubReg)
951  .addReg(Spill.VGPR)
952  .addImm(Spill.Lane);
953 
954  if (NumSubRegs > 1 && i == 0)
955  MIB.addReg(SuperReg, RegState::ImplicitDefine);
956  } else {
957  if (OnlyToVGPR)
958  return false;
959 
960  // Restore SGPR from a stack slot.
961  // FIXME: We should use S_LOAD_DWORD here for VI.
962  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
963  unsigned Align = FrameInfo.getObjectAlignment(Index);
964 
965  MachinePointerInfo PtrInfo
966  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
967 
968  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
969  MachineMemOperand::MOLoad, EltSize,
970  MinAlign(Align, EltSize * i));
971 
972  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
973  .addFrameIndex(Index) // vaddr
974  .addReg(MFI->getScratchRSrcReg()) // srsrc
975  .addReg(MFI->getFrameOffsetReg()) // soffset
976  .addImm(i * 4) // offset
977  .addMemOperand(MMO);
978 
979  auto MIB =
980  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
981  .addReg(TmpReg, RegState::Kill);
982 
983  if (NumSubRegs > 1)
984  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
985  }
986  }
987 
988  if (M0CopyReg != AMDGPU::NoRegister) {
989  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
990  .addReg(M0CopyReg, RegState::Kill);
991  }
992 
993  MI->eraseFromParent();
994  return true;
995 }
996 
997 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
998 /// a VGPR and the stack slot can be safely eliminated when all other users are
999 /// handled.
1002  int FI,
1003  RegScavenger *RS) const {
1004  switch (MI->getOpcode()) {
1005  case AMDGPU::SI_SPILL_S512_SAVE:
1006  case AMDGPU::SI_SPILL_S256_SAVE:
1007  case AMDGPU::SI_SPILL_S160_SAVE:
1008  case AMDGPU::SI_SPILL_S128_SAVE:
1009  case AMDGPU::SI_SPILL_S96_SAVE:
1010  case AMDGPU::SI_SPILL_S64_SAVE:
1011  case AMDGPU::SI_SPILL_S32_SAVE:
1012  return spillSGPR(MI, FI, RS, true);
1013  case AMDGPU::SI_SPILL_S512_RESTORE:
1014  case AMDGPU::SI_SPILL_S256_RESTORE:
1015  case AMDGPU::SI_SPILL_S160_RESTORE:
1016  case AMDGPU::SI_SPILL_S128_RESTORE:
1017  case AMDGPU::SI_SPILL_S96_RESTORE:
1018  case AMDGPU::SI_SPILL_S64_RESTORE:
1019  case AMDGPU::SI_SPILL_S32_RESTORE:
1020  return restoreSGPR(MI, FI, RS, true);
1021  default:
1022  llvm_unreachable("not an SGPR spill instruction");
1023  }
1024 }
1025 
1027  int SPAdj, unsigned FIOperandNum,
1028  RegScavenger *RS) const {
1029  MachineFunction *MF = MI->getParent()->getParent();
1031  MachineBasicBlock *MBB = MI->getParent();
1033  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1034  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1035  const SIInstrInfo *TII = ST.getInstrInfo();
1036  DebugLoc DL = MI->getDebugLoc();
1037 
1038  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1039  int Index = MI->getOperand(FIOperandNum).getIndex();
1040 
1041  switch (MI->getOpcode()) {
1042  // SGPR register spill
1043  case AMDGPU::SI_SPILL_S512_SAVE:
1044  case AMDGPU::SI_SPILL_S256_SAVE:
1045  case AMDGPU::SI_SPILL_S160_SAVE:
1046  case AMDGPU::SI_SPILL_S128_SAVE:
1047  case AMDGPU::SI_SPILL_S96_SAVE:
1048  case AMDGPU::SI_SPILL_S64_SAVE:
1049  case AMDGPU::SI_SPILL_S32_SAVE: {
1050  spillSGPR(MI, Index, RS);
1051  break;
1052  }
1053 
1054  // SGPR register restore
1055  case AMDGPU::SI_SPILL_S512_RESTORE:
1056  case AMDGPU::SI_SPILL_S256_RESTORE:
1057  case AMDGPU::SI_SPILL_S160_RESTORE:
1058  case AMDGPU::SI_SPILL_S128_RESTORE:
1059  case AMDGPU::SI_SPILL_S96_RESTORE:
1060  case AMDGPU::SI_SPILL_S64_RESTORE:
1061  case AMDGPU::SI_SPILL_S32_RESTORE: {
1062  restoreSGPR(MI, Index, RS);
1063  break;
1064  }
1065 
1066  // VGPR register spill
1067  case AMDGPU::SI_SPILL_V512_SAVE:
1068  case AMDGPU::SI_SPILL_V256_SAVE:
1069  case AMDGPU::SI_SPILL_V160_SAVE:
1070  case AMDGPU::SI_SPILL_V128_SAVE:
1071  case AMDGPU::SI_SPILL_V96_SAVE:
1072  case AMDGPU::SI_SPILL_V64_SAVE:
1073  case AMDGPU::SI_SPILL_V32_SAVE: {
1074  const MachineOperand *VData = TII->getNamedOperand(*MI,
1075  AMDGPU::OpName::vdata);
1076  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1077  Index,
1078  VData->getReg(), VData->isKill(),
1079  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1080  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1081  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1082  *MI->memoperands_begin(),
1083  RS);
1084  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1085  MI->eraseFromParent();
1086  break;
1087  }
1088  case AMDGPU::SI_SPILL_V32_RESTORE:
1089  case AMDGPU::SI_SPILL_V64_RESTORE:
1090  case AMDGPU::SI_SPILL_V96_RESTORE:
1091  case AMDGPU::SI_SPILL_V128_RESTORE:
1092  case AMDGPU::SI_SPILL_V160_RESTORE:
1093  case AMDGPU::SI_SPILL_V256_RESTORE:
1094  case AMDGPU::SI_SPILL_V512_RESTORE: {
1095  const MachineOperand *VData = TII->getNamedOperand(*MI,
1096  AMDGPU::OpName::vdata);
1097 
1098  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1099  Index,
1100  VData->getReg(), VData->isKill(),
1101  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1102  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1103  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1104  *MI->memoperands_begin(),
1105  RS);
1106  MI->eraseFromParent();
1107  break;
1108  }
1109 
1110  default: {
1111  const DebugLoc &DL = MI->getDebugLoc();
1112  bool IsMUBUF = TII->isMUBUF(*MI);
1113 
1114  if (!IsMUBUF &&
1115  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1116  // Convert to an absolute stack address by finding the offset from the
1117  // scratch wave base and scaling by the wave size.
1118  //
1119  // In an entry function/kernel the stack address is already the
1120  // absolute address relative to the scratch wave offset.
1121 
1122  unsigned DiffReg
1123  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1124 
1125  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1126  unsigned ResultReg = IsCopy ?
1127  MI->getOperand(0).getReg() :
1128  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1129 
1130  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1131  .addReg(MFI->getFrameOffsetReg())
1132  .addReg(MFI->getScratchWaveOffsetReg());
1133 
1134  int64_t Offset = FrameInfo.getObjectOffset(Index);
1135  if (Offset == 0) {
1136  // XXX - This never happens because of emergency scavenging slot at 0?
1137  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1138  .addImm(Log2_32(ST.getWavefrontSize()))
1139  .addReg(DiffReg);
1140  } else {
1141  unsigned ScaledReg
1142  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1143 
1144  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1145  .addImm(Log2_32(ST.getWavefrontSize()))
1146  .addReg(DiffReg, RegState::Kill);
1147 
1148  // TODO: Fold if use instruction is another add of a constant.
1150  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1151  .addImm(Offset)
1152  .addReg(ScaledReg, RegState::Kill)
1153  .addImm(0); // clamp bit
1154  } else {
1155  unsigned ConstOffsetReg
1156  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1157 
1158  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1159  .addImm(Offset);
1160  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1161  .addReg(ConstOffsetReg, RegState::Kill)
1162  .addReg(ScaledReg, RegState::Kill)
1163  .addImm(0); // clamp bit
1164  }
1165  }
1166 
1167  // Don't introduce an extra copy if we're just materializing in a mov.
1168  if (IsCopy)
1169  MI->eraseFromParent();
1170  else
1171  FIOp.ChangeToRegister(ResultReg, false, false, true);
1172  return;
1173  }
1174 
1175  if (IsMUBUF) {
1176  // Disable offen so we don't need a 0 vgpr base.
1177  assert(static_cast<int>(FIOperandNum) ==
1178  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1179  AMDGPU::OpName::vaddr));
1180 
1181  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1182  == MFI->getFrameOffsetReg());
1183 
1184  int64_t Offset = FrameInfo.getObjectOffset(Index);
1185  int64_t OldImm
1186  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1187  int64_t NewOffset = OldImm + Offset;
1188 
1189  if (isUInt<12>(NewOffset) &&
1190  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1191  MI->eraseFromParent();
1192  return;
1193  }
1194  }
1195 
1196  // If the offset is simply too big, don't convert to a scratch wave offset
1197  // relative index.
1198 
1199  int64_t Offset = FrameInfo.getObjectOffset(Index);
1200  FIOp.ChangeToImmediate(Offset);
1201  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1202  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1203  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1204  .addImm(Offset);
1205  FIOp.ChangeToRegister(TmpReg, false, false, true);
1206  }
1207  }
1208  }
1209 }
1210 
1212  #define AMDGPU_REG_ASM_NAMES
1213  #include "AMDGPURegAsmNames.inc.cpp"
1214 
1215  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1216  if (Reg >= BeginReg && Reg <= EndReg) { \
1217  unsigned Index = Reg - BeginReg; \
1218  assert(Index < array_lengthof(RegTable)); \
1219  return RegTable[Index]; \
1220  }
1221 
1222  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1223  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR105, SGPR32RegNames);
1224  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1225  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR104_SGPR105, SGPR64RegNames);
1226  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1227  VGPR96RegNames);
1228 
1229  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1230  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1231  VGPR128RegNames);
1232  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1233  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1234  SGPR128RegNames);
1235 
1236  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1237  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1238  VGPR256RegNames);
1239 
1240  REG_RANGE(
1241  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1242  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1243  VGPR512RegNames);
1244 
1245  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1246  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1247  SGPR256RegNames);
1248 
1249  REG_RANGE(
1250  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1251  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1252  SGPR512RegNames
1253  );
1254 
1255 #undef REG_RANGE
1256 
1257  // FIXME: Rename flat_scr so we don't need to special case this.
1258  switch (Reg) {
1259  case AMDGPU::FLAT_SCR:
1260  return "flat_scratch";
1261  case AMDGPU::FLAT_SCR_LO:
1262  return "flat_scratch_lo";
1263  case AMDGPU::FLAT_SCR_HI:
1264  return "flat_scratch_hi";
1265  default:
1266  // For the special named registers the default is fine.
1268  }
1269 }
1270 
1271 // FIXME: This is very slow. It might be worth creating a map from physreg to
1272 // register class.
1275 
1276  static const TargetRegisterClass *const BaseClasses[] = {
1277  &AMDGPU::VGPR_32RegClass,
1278  &AMDGPU::SReg_32RegClass,
1279  &AMDGPU::VReg_64RegClass,
1280  &AMDGPU::SReg_64RegClass,
1281  &AMDGPU::VReg_96RegClass,
1282  &AMDGPU::SReg_96RegClass,
1283  &AMDGPU::VReg_128RegClass,
1284  &AMDGPU::SReg_128RegClass,
1285  &AMDGPU::VReg_160RegClass,
1286  &AMDGPU::SReg_160RegClass,
1287  &AMDGPU::VReg_256RegClass,
1288  &AMDGPU::SReg_256RegClass,
1289  &AMDGPU::VReg_512RegClass,
1290  &AMDGPU::SReg_512RegClass,
1291  &AMDGPU::SCC_CLASSRegClass,
1292  &AMDGPU::Pseudo_SReg_32RegClass,
1293  &AMDGPU::Pseudo_SReg_128RegClass,
1294  };
1295 
1296  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1297  if (BaseClass->contains(Reg)) {
1298  return BaseClass;
1299  }
1300  }
1301  return nullptr;
1302 }
1303 
1304 // TODO: It might be helpful to have some target specific flags in
1305 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1307  unsigned Size = getRegSizeInBits(*RC);
1308  if (Size < 32)
1309  return false;
1310  switch (Size) {
1311  case 32:
1312  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1313  case 64:
1314  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1315  case 96:
1316  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1317  case 128:
1318  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1319  case 160:
1320  return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1321  case 256:
1322  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1323  case 512:
1324  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1325  default:
1326  llvm_unreachable("Invalid register class size");
1327  }
1328 }
1329 
1331  const TargetRegisterClass *SRC) const {
1332  switch (getRegSizeInBits(*SRC)) {
1333  case 32:
1334  return &AMDGPU::VGPR_32RegClass;
1335  case 64:
1336  return &AMDGPU::VReg_64RegClass;
1337  case 96:
1338  return &AMDGPU::VReg_96RegClass;
1339  case 128:
1340  return &AMDGPU::VReg_128RegClass;
1341  case 160:
1342  return &AMDGPU::VReg_160RegClass;
1343  case 256:
1344  return &AMDGPU::VReg_256RegClass;
1345  case 512:
1346  return &AMDGPU::VReg_512RegClass;
1347  default:
1348  llvm_unreachable("Invalid register class size");
1349  }
1350 }
1351 
1353  const TargetRegisterClass *VRC) const {
1354  switch (getRegSizeInBits(*VRC)) {
1355  case 32:
1356  return &AMDGPU::SGPR_32RegClass;
1357  case 64:
1358  return &AMDGPU::SReg_64RegClass;
1359  case 96:
1360  return &AMDGPU::SReg_96RegClass;
1361  case 128:
1362  return &AMDGPU::SReg_128RegClass;
1363  case 160:
1364  return &AMDGPU::SReg_160RegClass;
1365  case 256:
1366  return &AMDGPU::SReg_256RegClass;
1367  case 512:
1368  return &AMDGPU::SReg_512RegClass;
1369  default:
1370  llvm_unreachable("Invalid register class size");
1371  }
1372 }
1373 
1375  const TargetRegisterClass *RC, unsigned SubIdx) const {
1376  if (SubIdx == AMDGPU::NoSubRegister)
1377  return RC;
1378 
1379  // We can assume that each lane corresponds to one 32-bit register.
1380  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1381  if (isSGPRClass(RC)) {
1382  switch (Count) {
1383  case 1:
1384  return &AMDGPU::SGPR_32RegClass;
1385  case 2:
1386  return &AMDGPU::SReg_64RegClass;
1387  case 3:
1388  return &AMDGPU::SReg_96RegClass;
1389  case 4:
1390  return &AMDGPU::SReg_128RegClass;
1391  case 5:
1392  return &AMDGPU::SReg_160RegClass;
1393  case 8:
1394  return &AMDGPU::SReg_256RegClass;
1395  case 16: /* fall-through */
1396  default:
1397  llvm_unreachable("Invalid sub-register class size");
1398  }
1399  } else {
1400  switch (Count) {
1401  case 1:
1402  return &AMDGPU::VGPR_32RegClass;
1403  case 2:
1404  return &AMDGPU::VReg_64RegClass;
1405  case 3:
1406  return &AMDGPU::VReg_96RegClass;
1407  case 4:
1408  return &AMDGPU::VReg_128RegClass;
1409  case 5:
1410  return &AMDGPU::VReg_160RegClass;
1411  case 8:
1412  return &AMDGPU::VReg_256RegClass;
1413  case 16: /* fall-through */
1414  default:
1415  llvm_unreachable("Invalid sub-register class size");
1416  }
1417  }
1418 }
1419 
1421  const TargetRegisterClass *DefRC,
1422  unsigned DefSubReg,
1423  const TargetRegisterClass *SrcRC,
1424  unsigned SrcSubReg) const {
1425  // We want to prefer the smallest register class possible, so we don't want to
1426  // stop and rewrite on anything that looks like a subregister
1427  // extract. Operations mostly don't care about the super register class, so we
1428  // only want to stop on the most basic of copies between the same register
1429  // class.
1430  //
1431  // e.g. if we have something like
1432  // %0 = ...
1433  // %1 = ...
1434  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1435  // %3 = COPY %2, sub0
1436  //
1437  // We want to look through the COPY to find:
1438  // => %3 = COPY %0
1439 
1440  // Plain copy.
1441  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1442 }
1443 
1444 /// Returns a register that is not used at any point in the function.
1445 /// If all registers are used, then this function will return
1446 // AMDGPU::NoRegister.
1447 unsigned
1449  const TargetRegisterClass *RC,
1450  const MachineFunction &MF) const {
1451 
1452  for (unsigned Reg : *RC)
1453  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1454  return Reg;
1455  return AMDGPU::NoRegister;
1456 }
1457 
1459  unsigned EltSize) const {
1460  if (EltSize == 4) {
1461  static const int16_t Sub0_15[] = {
1462  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1463  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1464  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1465  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1466  };
1467 
1468  static const int16_t Sub0_7[] = {
1469  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1470  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1471  };
1472 
1473  static const int16_t Sub0_4[] = {
1474  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1475  };
1476 
1477  static const int16_t Sub0_3[] = {
1478  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1479  };
1480 
1481  static const int16_t Sub0_2[] = {
1482  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1483  };
1484 
1485  static const int16_t Sub0_1[] = {
1486  AMDGPU::sub0, AMDGPU::sub1,
1487  };
1488 
1489  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1490  case 32:
1491  return {};
1492  case 64:
1493  return makeArrayRef(Sub0_1);
1494  case 96:
1495  return makeArrayRef(Sub0_2);
1496  case 128:
1497  return makeArrayRef(Sub0_3);
1498  case 160:
1499  return makeArrayRef(Sub0_4);
1500  case 256:
1501  return makeArrayRef(Sub0_7);
1502  case 512:
1503  return makeArrayRef(Sub0_15);
1504  default:
1505  llvm_unreachable("unhandled register size");
1506  }
1507  }
1508 
1509  if (EltSize == 8) {
1510  static const int16_t Sub0_15_64[] = {
1511  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1512  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1513  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1514  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1515  };
1516 
1517  static const int16_t Sub0_7_64[] = {
1518  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1519  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1520  };
1521 
1522 
1523  static const int16_t Sub0_3_64[] = {
1524  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1525  };
1526 
1527  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1528  case 64:
1529  return {};
1530  case 128:
1531  return makeArrayRef(Sub0_3_64);
1532  case 256:
1533  return makeArrayRef(Sub0_7_64);
1534  case 512:
1535  return makeArrayRef(Sub0_15_64);
1536  default:
1537  llvm_unreachable("unhandled register size");
1538  }
1539  }
1540 
1541  assert(EltSize == 16 && "unhandled register spill split size");
1542 
1543  static const int16_t Sub0_15_128[] = {
1544  AMDGPU::sub0_sub1_sub2_sub3,
1545  AMDGPU::sub4_sub5_sub6_sub7,
1546  AMDGPU::sub8_sub9_sub10_sub11,
1547  AMDGPU::sub12_sub13_sub14_sub15
1548  };
1549 
1550  static const int16_t Sub0_7_128[] = {
1551  AMDGPU::sub0_sub1_sub2_sub3,
1552  AMDGPU::sub4_sub5_sub6_sub7
1553  };
1554 
1555  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1556  case 128:
1557  return {};
1558  case 256:
1559  return makeArrayRef(Sub0_7_128);
1560  case 512:
1561  return makeArrayRef(Sub0_15_128);
1562  default:
1563  llvm_unreachable("unhandled register size");
1564  }
1565 }
1566 
1567 const TargetRegisterClass*
1569  unsigned Reg) const {
1571  return MRI.getRegClass(Reg);
1572 
1573  return getPhysRegClass(Reg);
1574 }
1575 
1577  unsigned Reg) const {
1578  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1579  assert(RC && "Register class for the reg not found");
1580  return hasVGPRs(RC);
1581 }
1582 
1584  const TargetRegisterClass *SrcRC,
1585  unsigned SubReg,
1586  const TargetRegisterClass *DstRC,
1587  unsigned DstSubReg,
1588  const TargetRegisterClass *NewRC,
1589  LiveIntervals &LIS) const {
1590  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1591  unsigned DstSize = getRegSizeInBits(*DstRC);
1592  unsigned NewSize = getRegSizeInBits(*NewRC);
1593 
1594  // Do not increase size of registers beyond dword, we would need to allocate
1595  // adjacent registers and constraint regalloc more than needed.
1596 
1597  // Always allow dword coalescing.
1598  if (SrcSize <= 32 || DstSize <= 32)
1599  return true;
1600 
1601  return NewSize <= DstSize || NewSize <= SrcSize;
1602 }
1603 
1605  MachineFunction &MF) const {
1606 
1607  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1609 
1610  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1611  MF.getFunction());
1612  switch (RC->getID()) {
1613  default:
1614  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1615  case AMDGPU::VGPR_32RegClassID:
1616  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1617  case AMDGPU::SGPR_32RegClassID:
1618  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1619  }
1620 }
1621 
1623  unsigned Idx) const {
1624  if (Idx == getVGPRPressureSet())
1625  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1626  const_cast<MachineFunction &>(MF));
1627 
1628  if (Idx == getSGPRPressureSet())
1629  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1630  const_cast<MachineFunction &>(MF));
1631 
1632  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1633 }
1634 
1635 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1636  static const int Empty[] = { -1 };
1637 
1638  if (hasRegUnit(AMDGPU::M0, RegUnit))
1639  return Empty;
1640  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1641 }
1642 
1644  // Not a callee saved register.
1645  return AMDGPU::SGPR30_SGPR31;
1646 }
1647 
1648 const TargetRegisterClass *
1650  const MachineRegisterInfo &MRI) const {
1651  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1652  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1653  if (!RB)
1654  return nullptr;
1655 
1656  Size = PowerOf2Ceil(Size);
1657  switch (Size) {
1658  case 32:
1659  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1660  &AMDGPU::SReg_32_XM0RegClass;
1661  case 64:
1662  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1663  &AMDGPU::SReg_64_XEXECRegClass;
1664  case 96:
1665  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1666  &AMDGPU::SReg_96RegClass;
1667  case 128:
1668  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1669  &AMDGPU::SReg_128RegClass;
1670  case 160:
1671  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1672  &AMDGPU::SReg_160RegClass;
1673  case 256:
1674  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1675  &AMDGPU::SReg_256RegClass;
1676  case 512:
1677  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1678  &AMDGPU::SReg_512RegClass;
1679  default:
1680  llvm_unreachable("not implemented");
1681  }
1682 }
1683 
1684 // Find reaching register definition
1686  MachineInstr &Use,
1688  LiveIntervals *LIS) const {
1689  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1690  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1691  SlotIndex DefIdx;
1692 
1694  if (!LIS->hasInterval(Reg))
1695  return nullptr;
1696  LiveInterval &LI = LIS->getInterval(Reg);
1697  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1698  : MRI.getMaxLaneMaskForVReg(Reg);
1699  VNInfo *V = nullptr;
1700  if (LI.hasSubRanges()) {
1701  for (auto &S : LI.subranges()) {
1702  if ((S.LaneMask & SubLanes) == SubLanes) {
1703  V = S.getVNInfoAt(UseIdx);
1704  break;
1705  }
1706  }
1707  } else {
1708  V = LI.getVNInfoAt(UseIdx);
1709  }
1710  if (!V)
1711  return nullptr;
1712  DefIdx = V->def;
1713  } else {
1714  // Find last def.
1715  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1716  LiveRange &LR = LIS->getRegUnit(*Units);
1717  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1718  if (!DefIdx.isValid() ||
1719  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1720  LIS->getInstructionFromIndex(V->def)))
1721  DefIdx = V->def;
1722  } else {
1723  return nullptr;
1724  }
1725  }
1726  }
1727 
1728  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1729 
1730  if (!Def || !MDT.dominates(Def, &Use))
1731  return nullptr;
1732 
1733  assert(Def->modifiesRegister(Reg, this));
1734 
1735  return Def;
1736 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:637
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:829
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:33
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:717
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:722
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:750
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:430
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:408
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:124
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
LiveInterval & getInterval(unsigned Reg)
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:404
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:658
bool hasCalls() const
Return true if the current function has any function calls.