LLVM  7.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/LLVMContext.h"
24 
25 using namespace llvm;
26 
27 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
28  for (unsigned i = 0; PSets[i] != -1; ++i) {
29  if (PSets[i] == (int)PSetID)
30  return true;
31  }
32  return false;
33 }
34 
35 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
36  BitVector &PressureSets) const {
37  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
38  const int *PSets = getRegUnitPressureSets(*U);
39  if (hasPressureSet(PSets, PSetID)) {
40  PressureSets.set(PSetID);
41  break;
42  }
43  }
44 }
45 
47  "amdgpu-spill-sgpr-to-smem",
48  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
49  cl::init(false));
50 
52  "amdgpu-spill-sgpr-to-vgpr",
53  cl::desc("Enable spilling VGPRs to SGPRs"),
55  cl::init(true));
56 
59  SGPRPressureSets(getNumRegPressureSets()),
60  VGPRPressureSets(getNumRegPressureSets()),
61  SpillSGPRToVGPR(false),
62  SpillSGPRToSMEM(false) {
63  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
64  SpillSGPRToSMEM = true;
65  else if (EnableSpillSGPRToVGPR)
66  SpillSGPRToVGPR = true;
67 
68  unsigned NumRegPressureSets = getNumRegPressureSets();
69 
70  SGPRSetID = NumRegPressureSets;
71  VGPRSetID = NumRegPressureSets;
72 
73  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
74  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
75  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
76  }
77 
78  // Determine the number of reg units for each pressure set.
79  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
80  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
81  const int *PSets = getRegUnitPressureSets(i);
82  for (unsigned j = 0; PSets[j] != -1; ++j) {
83  ++PressureSetRegUnits[PSets[j]];
84  }
85  }
86 
87  unsigned VGPRMax = 0, SGPRMax = 0;
88  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
89  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
90  VGPRSetID = i;
91  VGPRMax = PressureSetRegUnits[i];
92  continue;
93  }
94  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
95  SGPRSetID = i;
96  SGPRMax = PressureSetRegUnits[i];
97  }
98  }
99 
100  assert(SGPRSetID < NumRegPressureSets &&
101  VGPRSetID < NumRegPressureSets);
102 }
103 
105  const MachineFunction &MF) const {
106 
107  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
108  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
109  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
110  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
111 }
112 
113 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
114  unsigned Reg;
115 
116  // Try to place it in a hole after PrivateSegmentBufferReg.
117  if (RegCount & 3) {
118  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
119  // alignment constraints, so we have a hole where can put the wave offset.
120  Reg = RegCount - 1;
121  } else {
122  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
123  // wave offset before it.
124  Reg = RegCount - 5;
125  }
126 
127  return Reg;
128 }
129 
131  const MachineFunction &MF) const {
132  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
134  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
135 }
136 
138  const MachineFunction &MF) const {
139  return AMDGPU::SGPR32;
140 }
141 
143  BitVector Reserved(getNumRegs());
144 
145  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
146  // this seems likely to result in bugs, so I'm marking them as reserved.
147  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
148  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
149 
150  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
151  reserveRegisterTuples(Reserved, AMDGPU::M0);
152 
153  // Reserve the memory aperture registers.
154  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
155  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
156  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
157  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
158 
159  // Reserve xnack_mask registers - support is not implemented in Codegen.
160  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
161 
162  // Reserve Trap Handler registers - support is not implemented in Codegen.
163  reserveRegisterTuples(Reserved, AMDGPU::TBA);
164  reserveRegisterTuples(Reserved, AMDGPU::TMA);
165  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
166  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
167  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
168  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
169  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
170  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
171  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
172  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
173 
174  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
175 
176  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
177  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
178  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
179  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
180  reserveRegisterTuples(Reserved, Reg);
181  }
182 
183  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
184  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
185  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
186  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
187  reserveRegisterTuples(Reserved, Reg);
188  }
189 
191 
192  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
193  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
194  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
195  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
196  }
197 
198  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
199  if (ScratchRSrcReg != AMDGPU::NoRegister) {
200  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
201  // to spill.
202  // TODO: May need to reserve a VGPR if doing LDS spilling.
203  reserveRegisterTuples(Reserved, ScratchRSrcReg);
204  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
205  }
206 
207  // We have to assume the SP is needed in case there are calls in the function,
208  // which is detected after the function is lowered. If we aren't really going
209  // to need SP, don't bother reserving it.
210  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
211 
212  if (StackPtrReg != AMDGPU::NoRegister) {
213  reserveRegisterTuples(Reserved, StackPtrReg);
214  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
215  }
216 
217  unsigned FrameReg = MFI->getFrameOffsetReg();
218  if (FrameReg != AMDGPU::NoRegister) {
219  reserveRegisterTuples(Reserved, FrameReg);
220  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
221  }
222 
223  return Reserved;
224 }
225 
228  if (Info->isEntryFunction()) {
229  const MachineFrameInfo &MFI = Fn.getFrameInfo();
230  return MFI.hasStackObjects() || MFI.hasCalls();
231  }
232 
233  // May need scavenger for dealing with callee saved registers.
234  return true;
235 }
236 
238  const MachineFunction &MF) const {
239  const MachineFrameInfo &MFI = MF.getFrameInfo();
240  if (MFI.hasStackObjects())
241  return true;
242 
243  // May need to deal with callee saved registers.
245  return !Info->isEntryFunction();
246 }
247 
249  const MachineFunction &MF) const {
250  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
251  // create a virtual register for it during frame index elimination, so the
252  // scavenger is directly needed.
253  return MF.getFrameInfo().hasStackObjects() &&
254  MF.getSubtarget<SISubtarget>().hasScalarStores() &&
255  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
256 }
257 
259  const MachineFunction &) const {
260  // There are no special dedicated stack or frame pointers.
261  return true;
262 }
263 
265  // This helps catch bugs as verifier errors.
266  return true;
267 }
268 
271 
272  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
273  AMDGPU::OpName::offset);
274  return MI->getOperand(OffIdx).getImm();
275 }
276 
278  int Idx) const {
279  if (!SIInstrInfo::isMUBUF(*MI))
280  return 0;
281 
283  AMDGPU::OpName::vaddr) &&
284  "Should never see frame index on non-address operand");
285 
286  return getMUBUFInstrOffset(MI);
287 }
288 
290  if (!MI->mayLoadOrStore())
291  return false;
292 
293  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
294 
295  return !isUInt<12>(FullOffset);
296 }
297 
299  unsigned BaseReg,
300  int FrameIdx,
301  int64_t Offset) const {
303  DebugLoc DL; // Defaults to "unknown"
304 
305  if (Ins != MBB->end())
306  DL = Ins->getDebugLoc();
307 
308  MachineFunction *MF = MBB->getParent();
309  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
310  const SIInstrInfo *TII = Subtarget.getInstrInfo();
311 
312  if (Offset == 0) {
313  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
314  .addFrameIndex(FrameIdx);
315  return;
316  }
317 
319  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
320 
321  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
322 
323  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
324  .addImm(Offset);
325  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
326  .addFrameIndex(FrameIdx);
327 
328  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
329  .addReg(OffsetReg, RegState::Kill)
330  .addReg(FIReg);
331 }
332 
334  int64_t Offset) const {
335 
336  MachineBasicBlock *MBB = MI.getParent();
337  MachineFunction *MF = MBB->getParent();
338  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
339  const SIInstrInfo *TII = Subtarget.getInstrInfo();
340 
341 #ifndef NDEBUG
342  // FIXME: Is it possible to be storing a frame index to itself?
343  bool SeenFI = false;
344  for (const MachineOperand &MO: MI.operands()) {
345  if (MO.isFI()) {
346  if (SeenFI)
347  llvm_unreachable("should not see multiple frame indices");
348 
349  SeenFI = true;
350  }
351  }
352 #endif
353 
354  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
355  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
356  assert(TII->isMUBUF(MI));
357  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
358  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
359  "should only be seeing frame offset relative FrameIndex");
360 
361 
362  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
363  int64_t NewOffset = OffsetOp->getImm() + Offset;
364  assert(isUInt<12>(NewOffset) && "offset should be legal");
365 
366  FIOp->ChangeToRegister(BaseReg, false);
367  OffsetOp->setImm(NewOffset);
368 }
369 
371  unsigned BaseReg,
372  int64_t Offset) const {
373  if (!SIInstrInfo::isMUBUF(*MI))
374  return false;
375 
376  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
377 
378  return isUInt<12>(NewOffset);
379 }
380 
382  const MachineFunction &MF, unsigned Kind) const {
383  // This is inaccurate. It depends on the instruction and address space. The
384  // only place where we should hit this is for dealing with frame indexes /
385  // private accesses, so this is correct in that case.
386  return &AMDGPU::VGPR_32RegClass;
387 }
388 
389 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
390 
391  switch (Op) {
392  case AMDGPU::SI_SPILL_S512_SAVE:
393  case AMDGPU::SI_SPILL_S512_RESTORE:
394  case AMDGPU::SI_SPILL_V512_SAVE:
395  case AMDGPU::SI_SPILL_V512_RESTORE:
396  return 16;
397  case AMDGPU::SI_SPILL_S256_SAVE:
398  case AMDGPU::SI_SPILL_S256_RESTORE:
399  case AMDGPU::SI_SPILL_V256_SAVE:
400  case AMDGPU::SI_SPILL_V256_RESTORE:
401  return 8;
402  case AMDGPU::SI_SPILL_S128_SAVE:
403  case AMDGPU::SI_SPILL_S128_RESTORE:
404  case AMDGPU::SI_SPILL_V128_SAVE:
405  case AMDGPU::SI_SPILL_V128_RESTORE:
406  return 4;
407  case AMDGPU::SI_SPILL_V96_SAVE:
408  case AMDGPU::SI_SPILL_V96_RESTORE:
409  return 3;
410  case AMDGPU::SI_SPILL_S64_SAVE:
411  case AMDGPU::SI_SPILL_S64_RESTORE:
412  case AMDGPU::SI_SPILL_V64_SAVE:
413  case AMDGPU::SI_SPILL_V64_RESTORE:
414  return 2;
415  case AMDGPU::SI_SPILL_S32_SAVE:
416  case AMDGPU::SI_SPILL_S32_RESTORE:
417  case AMDGPU::SI_SPILL_V32_SAVE:
418  case AMDGPU::SI_SPILL_V32_RESTORE:
419  return 1;
420  default: llvm_unreachable("Invalid spill opcode");
421  }
422 }
423 
424 static int getOffsetMUBUFStore(unsigned Opc) {
425  switch (Opc) {
426  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
427  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
428  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
429  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
430  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
431  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
432  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
433  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
434  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
435  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
436  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
437  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
438  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
439  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
440  default:
441  return -1;
442  }
443 }
444 
445 static int getOffsetMUBUFLoad(unsigned Opc) {
446  switch (Opc) {
447  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
448  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
449  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
450  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
451  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
452  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
453  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
454  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
455  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
456  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
457  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
458  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
459  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
460  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
461  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
462  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
463  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
464  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
465  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
466  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
467  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
468  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
469  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
470  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
471  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
472  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
473  default:
474  return -1;
475  }
476 }
477 
478 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
479 // need to handle the case where an SGPR may need to be spilled while spilling.
481  MachineFrameInfo &MFI,
483  int Index,
484  int64_t Offset) {
485  MachineBasicBlock *MBB = MI->getParent();
486  const DebugLoc &DL = MI->getDebugLoc();
487  bool IsStore = MI->mayStore();
488 
489  unsigned Opc = MI->getOpcode();
490  int LoadStoreOp = IsStore ?
492  if (LoadStoreOp == -1)
493  return false;
494 
495  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
496  MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
497  .add(*Reg)
498  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
499  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
500  .addImm(Offset)
501  .addImm(0) // glc
502  .addImm(0) // slc
503  .addImm(0) // tfe
504  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
505 
506  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
507  AMDGPU::OpName::vdata_in);
508  if (VDataIn)
509  NewMI.add(*VDataIn);
510  return true;
511 }
512 
513 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
514  unsigned LoadStoreOp,
515  int Index,
516  unsigned ValueReg,
517  bool IsKill,
518  unsigned ScratchRsrcReg,
519  unsigned ScratchOffsetReg,
520  int64_t InstOffset,
521  MachineMemOperand *MMO,
522  RegScavenger *RS) const {
523  MachineBasicBlock *MBB = MI->getParent();
524  MachineFunction *MF = MI->getParent()->getParent();
525  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
526  const SIInstrInfo *TII = ST.getInstrInfo();
527  const MachineFrameInfo &MFI = MF->getFrameInfo();
528 
529  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
530  const DebugLoc &DL = MI->getDebugLoc();
531  bool IsStore = Desc.mayStore();
532 
533  bool RanOutOfSGPRs = false;
534  bool Scavenged = false;
535  unsigned SOffset = ScratchOffsetReg;
536 
537  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
538  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
539  unsigned Size = NumSubRegs * 4;
540  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
541  const int64_t OriginalImmOffset = Offset;
542 
543  unsigned Align = MFI.getObjectAlignment(Index);
544  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
545 
546  if (!isUInt<12>(Offset + Size)) {
547  SOffset = AMDGPU::NoRegister;
548 
549  // We don't have access to the register scavenger if this function is called
550  // during PEI::scavengeFrameVirtualRegs().
551  if (RS)
552  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
553 
554  if (SOffset == AMDGPU::NoRegister) {
555  // There are no free SGPRs, and since we are in the process of spilling
556  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
557  // on SI/CI and on VI it is true until we implement spilling using scalar
558  // stores), we have no way to free up an SGPR. Our solution here is to
559  // add the offset directly to the ScratchOffset register, and then
560  // subtract the offset after the spill to return ScratchOffset to it's
561  // original value.
562  RanOutOfSGPRs = true;
563  SOffset = ScratchOffsetReg;
564  } else {
565  Scavenged = true;
566  }
567 
568  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
569  .addReg(ScratchOffsetReg)
570  .addImm(Offset);
571 
572  Offset = 0;
573  }
574 
575  const unsigned EltSize = 4;
576 
577  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
578  unsigned SubReg = NumSubRegs == 1 ?
579  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
580 
581  unsigned SOffsetRegState = 0;
582  unsigned SrcDstRegState = getDefRegState(!IsStore);
583  if (i + 1 == e) {
584  SOffsetRegState |= getKillRegState(Scavenged);
585  // The last implicit use carries the "Kill" flag.
586  SrcDstRegState |= getKillRegState(IsKill);
587  }
588 
589  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
590  MachineMemOperand *NewMMO
591  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
592  EltSize, MinAlign(Align, EltSize * i));
593 
594  auto MIB = BuildMI(*MBB, MI, DL, Desc)
595  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
596  .addReg(ScratchRsrcReg)
597  .addReg(SOffset, SOffsetRegState)
598  .addImm(Offset)
599  .addImm(0) // glc
600  .addImm(0) // slc
601  .addImm(0) // tfe
602  .addMemOperand(NewMMO);
603 
604  if (NumSubRegs > 1)
605  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
606  }
607 
608  if (RanOutOfSGPRs) {
609  // Subtract the offset we added to the ScratchOffset register.
610  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
611  .addReg(ScratchOffsetReg)
612  .addImm(OriginalImmOffset);
613  }
614 }
615 
616 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
617  bool Store) {
618  if (SuperRegSize % 16 == 0) {
619  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
620  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
621  }
622 
623  if (SuperRegSize % 8 == 0) {
624  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
625  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
626  }
627 
628  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
629  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
630 }
631 
633  int Index,
634  RegScavenger *RS,
635  bool OnlyToVGPR) const {
636  MachineBasicBlock *MBB = MI->getParent();
637  MachineFunction *MF = MBB->getParent();
639 
641  = MFI->getSGPRToVGPRSpills(Index);
642  bool SpillToVGPR = !VGPRSpills.empty();
643  if (OnlyToVGPR && !SpillToVGPR)
644  return false;
645 
647  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
648  const SIInstrInfo *TII = ST.getInstrInfo();
649 
650  unsigned SuperReg = MI->getOperand(0).getReg();
651  bool IsKill = MI->getOperand(0).isKill();
652  const DebugLoc &DL = MI->getDebugLoc();
653 
654  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
655 
656  bool SpillToSMEM = spillSGPRToSMEM();
657  if (SpillToSMEM && OnlyToVGPR)
658  return false;
659 
660  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
661 
662  unsigned OffsetReg = AMDGPU::M0;
663  unsigned M0CopyReg = AMDGPU::NoRegister;
664 
665  if (SpillToSMEM) {
666  if (RS->isRegUsed(AMDGPU::M0)) {
667  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
668  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
669  .addReg(AMDGPU::M0);
670  }
671  }
672 
673  unsigned ScalarStoreOp;
674  unsigned EltSize = 4;
675  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
676  if (SpillToSMEM && isSGPRClass(RC)) {
677  // XXX - if private_element_size is larger than 4 it might be useful to be
678  // able to spill wider vmem spills.
679  std::tie(EltSize, ScalarStoreOp) =
680  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
681  }
682 
683  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
684  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
685 
686  // SubReg carries the "Kill" flag when SubReg == SuperReg.
687  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
688  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
689  unsigned SubReg = NumSubRegs == 1 ?
690  SuperReg : getSubReg(SuperReg, SplitParts[i]);
691 
692  if (SpillToSMEM) {
693  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
694 
695  // The allocated memory size is really the wavefront size * the frame
696  // index size. The widest register class is 64 bytes, so a 4-byte scratch
697  // allocation is enough to spill this in a single stack object.
698  //
699  // FIXME: Frame size/offsets are computed earlier than this, so the extra
700  // space is still unnecessarily allocated.
701 
702  unsigned Align = FrameInfo.getObjectAlignment(Index);
703  MachinePointerInfo PtrInfo
704  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
705  MachineMemOperand *MMO
707  EltSize, MinAlign(Align, EltSize * i));
708 
709  // SMEM instructions only support a single offset, so increment the wave
710  // offset.
711 
712  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
713  if (Offset != 0) {
714  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
715  .addReg(MFI->getFrameOffsetReg())
716  .addImm(Offset);
717  } else {
718  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
719  .addReg(MFI->getFrameOffsetReg());
720  }
721 
722  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
723  .addReg(SubReg, getKillRegState(IsKill)) // sdata
724  .addReg(MFI->getScratchRSrcReg()) // sbase
725  .addReg(OffsetReg, RegState::Kill) // soff
726  .addImm(0) // glc
727  .addMemOperand(MMO);
728 
729  continue;
730  }
731 
732  if (SpillToVGPR) {
733  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
734 
735  BuildMI(*MBB, MI, DL,
736  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
737  Spill.VGPR)
738  .addReg(SubReg, getKillRegState(IsKill))
739  .addImm(Spill.Lane);
740 
741  // FIXME: Since this spills to another register instead of an actual
742  // frame index, we should delete the frame index when all references to
743  // it are fixed.
744  } else {
745  // XXX - Can to VGPR spill fail for some subregisters but not others?
746  if (OnlyToVGPR)
747  return false;
748 
749  // Spill SGPR to a frame index.
750  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
751  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
752  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
753 
755  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
756  .addReg(SubReg, SubKillState);
757 
758 
759  // There could be undef components of a spilled super register.
760  // TODO: Can we detect this and skip the spill?
761  if (NumSubRegs > 1) {
762  // The last implicit use of the SuperReg carries the "Kill" flag.
763  unsigned SuperKillState = 0;
764  if (i + 1 == e)
765  SuperKillState |= getKillRegState(IsKill);
766  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
767  }
768 
769  unsigned Align = FrameInfo.getObjectAlignment(Index);
770  MachinePointerInfo PtrInfo
771  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
772  MachineMemOperand *MMO
774  EltSize, MinAlign(Align, EltSize * i));
775  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
776  .addReg(TmpReg, RegState::Kill) // src
777  .addFrameIndex(Index) // vaddr
778  .addReg(MFI->getScratchRSrcReg()) // srrsrc
779  .addReg(MFI->getFrameOffsetReg()) // soffset
780  .addImm(i * 4) // offset
781  .addMemOperand(MMO);
782  }
783  }
784 
785  if (M0CopyReg != AMDGPU::NoRegister) {
786  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
787  .addReg(M0CopyReg, RegState::Kill);
788  }
789 
790  MI->eraseFromParent();
791  MFI->addToSpilledSGPRs(NumSubRegs);
792  return true;
793 }
794 
796  int Index,
797  RegScavenger *RS,
798  bool OnlyToVGPR) const {
799  MachineFunction *MF = MI->getParent()->getParent();
801  MachineBasicBlock *MBB = MI->getParent();
803 
805  = MFI->getSGPRToVGPRSpills(Index);
806  bool SpillToVGPR = !VGPRSpills.empty();
807  if (OnlyToVGPR && !SpillToVGPR)
808  return false;
809 
810  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
811  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
812  const SIInstrInfo *TII = ST.getInstrInfo();
813  const DebugLoc &DL = MI->getDebugLoc();
814 
815  unsigned SuperReg = MI->getOperand(0).getReg();
816  bool SpillToSMEM = spillSGPRToSMEM();
817  if (SpillToSMEM && OnlyToVGPR)
818  return false;
819 
820  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
821 
822  unsigned OffsetReg = AMDGPU::M0;
823  unsigned M0CopyReg = AMDGPU::NoRegister;
824 
825  if (SpillToSMEM) {
826  if (RS->isRegUsed(AMDGPU::M0)) {
827  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
828  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
829  .addReg(AMDGPU::M0);
830  }
831  }
832 
833  unsigned EltSize = 4;
834  unsigned ScalarLoadOp;
835 
836  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
837  if (SpillToSMEM && isSGPRClass(RC)) {
838  // XXX - if private_element_size is larger than 4 it might be useful to be
839  // able to spill wider vmem spills.
840  std::tie(EltSize, ScalarLoadOp) =
841  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
842  }
843 
844  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
845  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
846 
847  // SubReg carries the "Kill" flag when SubReg == SuperReg.
848  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
849 
850  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
851  unsigned SubReg = NumSubRegs == 1 ?
852  SuperReg : getSubReg(SuperReg, SplitParts[i]);
853 
854  if (SpillToSMEM) {
855  // FIXME: Size may be > 4 but extra bytes wasted.
856  unsigned Align = FrameInfo.getObjectAlignment(Index);
857  MachinePointerInfo PtrInfo
858  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
859  MachineMemOperand *MMO
861  EltSize, MinAlign(Align, EltSize * i));
862 
863  // Add i * 4 offset
864  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
865  if (Offset != 0) {
866  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
867  .addReg(MFI->getFrameOffsetReg())
868  .addImm(Offset);
869  } else {
870  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
871  .addReg(MFI->getFrameOffsetReg());
872  }
873 
874  auto MIB =
875  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
876  .addReg(MFI->getScratchRSrcReg()) // sbase
877  .addReg(OffsetReg, RegState::Kill) // soff
878  .addImm(0) // glc
879  .addMemOperand(MMO);
880 
881  if (NumSubRegs > 1)
882  MIB.addReg(SuperReg, RegState::ImplicitDefine);
883 
884  continue;
885  }
886 
887  if (SpillToVGPR) {
888  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
889  auto MIB =
890  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
891  SubReg)
892  .addReg(Spill.VGPR)
893  .addImm(Spill.Lane);
894 
895  if (NumSubRegs > 1)
896  MIB.addReg(SuperReg, RegState::ImplicitDefine);
897  } else {
898  if (OnlyToVGPR)
899  return false;
900 
901  // Restore SGPR from a stack slot.
902  // FIXME: We should use S_LOAD_DWORD here for VI.
903  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
904  unsigned Align = FrameInfo.getObjectAlignment(Index);
905 
906  MachinePointerInfo PtrInfo
907  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
908 
909  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
910  MachineMemOperand::MOLoad, EltSize,
911  MinAlign(Align, EltSize * i));
912 
913  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
914  .addFrameIndex(Index) // vaddr
915  .addReg(MFI->getScratchRSrcReg()) // srsrc
916  .addReg(MFI->getFrameOffsetReg()) // soffset
917  .addImm(i * 4) // offset
918  .addMemOperand(MMO);
919 
920  auto MIB =
921  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
922  .addReg(TmpReg, RegState::Kill);
923 
924  if (NumSubRegs > 1)
925  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
926  }
927  }
928 
929  if (M0CopyReg != AMDGPU::NoRegister) {
930  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
931  .addReg(M0CopyReg, RegState::Kill);
932  }
933 
934  MI->eraseFromParent();
935  return true;
936 }
937 
938 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
939 /// a VGPR and the stack slot can be safely eliminated when all other users are
940 /// handled.
943  int FI,
944  RegScavenger *RS) const {
945  switch (MI->getOpcode()) {
946  case AMDGPU::SI_SPILL_S512_SAVE:
947  case AMDGPU::SI_SPILL_S256_SAVE:
948  case AMDGPU::SI_SPILL_S128_SAVE:
949  case AMDGPU::SI_SPILL_S64_SAVE:
950  case AMDGPU::SI_SPILL_S32_SAVE:
951  return spillSGPR(MI, FI, RS, true);
952  case AMDGPU::SI_SPILL_S512_RESTORE:
953  case AMDGPU::SI_SPILL_S256_RESTORE:
954  case AMDGPU::SI_SPILL_S128_RESTORE:
955  case AMDGPU::SI_SPILL_S64_RESTORE:
956  case AMDGPU::SI_SPILL_S32_RESTORE:
957  return restoreSGPR(MI, FI, RS, true);
958  default:
959  llvm_unreachable("not an SGPR spill instruction");
960  }
961 }
962 
964  int SPAdj, unsigned FIOperandNum,
965  RegScavenger *RS) const {
966  MachineFunction *MF = MI->getParent()->getParent();
968  MachineBasicBlock *MBB = MI->getParent();
970  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
971  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
972  const SIInstrInfo *TII = ST.getInstrInfo();
973  DebugLoc DL = MI->getDebugLoc();
974 
975  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
976  int Index = MI->getOperand(FIOperandNum).getIndex();
977 
978  switch (MI->getOpcode()) {
979  // SGPR register spill
980  case AMDGPU::SI_SPILL_S512_SAVE:
981  case AMDGPU::SI_SPILL_S256_SAVE:
982  case AMDGPU::SI_SPILL_S128_SAVE:
983  case AMDGPU::SI_SPILL_S64_SAVE:
984  case AMDGPU::SI_SPILL_S32_SAVE: {
985  spillSGPR(MI, Index, RS);
986  break;
987  }
988 
989  // SGPR register restore
990  case AMDGPU::SI_SPILL_S512_RESTORE:
991  case AMDGPU::SI_SPILL_S256_RESTORE:
992  case AMDGPU::SI_SPILL_S128_RESTORE:
993  case AMDGPU::SI_SPILL_S64_RESTORE:
994  case AMDGPU::SI_SPILL_S32_RESTORE: {
995  restoreSGPR(MI, Index, RS);
996  break;
997  }
998 
999  // VGPR register spill
1000  case AMDGPU::SI_SPILL_V512_SAVE:
1001  case AMDGPU::SI_SPILL_V256_SAVE:
1002  case AMDGPU::SI_SPILL_V128_SAVE:
1003  case AMDGPU::SI_SPILL_V96_SAVE:
1004  case AMDGPU::SI_SPILL_V64_SAVE:
1005  case AMDGPU::SI_SPILL_V32_SAVE: {
1006  const MachineOperand *VData = TII->getNamedOperand(*MI,
1007  AMDGPU::OpName::vdata);
1008  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1009  Index,
1010  VData->getReg(), VData->isKill(),
1011  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1012  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1013  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1014  *MI->memoperands_begin(),
1015  RS);
1016  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1017  MI->eraseFromParent();
1018  break;
1019  }
1020  case AMDGPU::SI_SPILL_V32_RESTORE:
1021  case AMDGPU::SI_SPILL_V64_RESTORE:
1022  case AMDGPU::SI_SPILL_V96_RESTORE:
1023  case AMDGPU::SI_SPILL_V128_RESTORE:
1024  case AMDGPU::SI_SPILL_V256_RESTORE:
1025  case AMDGPU::SI_SPILL_V512_RESTORE: {
1026  const MachineOperand *VData = TII->getNamedOperand(*MI,
1027  AMDGPU::OpName::vdata);
1028 
1029  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1030  Index,
1031  VData->getReg(), VData->isKill(),
1032  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1033  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1034  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1035  *MI->memoperands_begin(),
1036  RS);
1037  MI->eraseFromParent();
1038  break;
1039  }
1040 
1041  default: {
1042  const DebugLoc &DL = MI->getDebugLoc();
1043  bool IsMUBUF = TII->isMUBUF(*MI);
1044 
1045  if (!IsMUBUF &&
1046  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1047  // Convert to an absolute stack address by finding the offset from the
1048  // scratch wave base and scaling by the wave size.
1049  //
1050  // In an entry function/kernel the stack address is already the
1051  // absolute address relative to the scratch wave offset.
1052 
1053  unsigned DiffReg
1054  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1055 
1056  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1057  unsigned ResultReg = IsCopy ?
1058  MI->getOperand(0).getReg() :
1059  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1060 
1061  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1062  .addReg(MFI->getFrameOffsetReg())
1063  .addReg(MFI->getScratchWaveOffsetReg());
1064 
1065  int64_t Offset = FrameInfo.getObjectOffset(Index);
1066  if (Offset == 0) {
1067  // XXX - This never happens because of emergency scavenging slot at 0?
1068  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1069  .addImm(Log2_32(ST.getWavefrontSize()))
1070  .addReg(DiffReg);
1071  } else {
1072  unsigned ScaledReg
1073  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1074 
1075  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1076  .addImm(Log2_32(ST.getWavefrontSize()))
1077  .addReg(DiffReg, RegState::Kill);
1078 
1079  // TODO: Fold if use instruction is another add of a constant.
1081  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1082  .addImm(Offset)
1083  .addReg(ScaledReg, RegState::Kill);
1084  } else {
1085  unsigned ConstOffsetReg
1086  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1087 
1088  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1089  .addImm(Offset);
1090  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1091  .addReg(ConstOffsetReg, RegState::Kill)
1092  .addReg(ScaledReg, RegState::Kill);
1093  }
1094  }
1095 
1096  // Don't introduce an extra copy if we're just materializing in a mov.
1097  if (IsCopy)
1098  MI->eraseFromParent();
1099  else
1100  FIOp.ChangeToRegister(ResultReg, false, false, true);
1101  return;
1102  }
1103 
1104  if (IsMUBUF) {
1105  // Disable offen so we don't need a 0 vgpr base.
1106  assert(static_cast<int>(FIOperandNum) ==
1107  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1108  AMDGPU::OpName::vaddr));
1109 
1110  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1111  == MFI->getFrameOffsetReg());
1112 
1113  int64_t Offset = FrameInfo.getObjectOffset(Index);
1114  int64_t OldImm
1115  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1116  int64_t NewOffset = OldImm + Offset;
1117 
1118  if (isUInt<12>(NewOffset) &&
1119  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1120  MI->eraseFromParent();
1121  return;
1122  }
1123  }
1124 
1125  // If the offset is simply too big, don't convert to a scratch wave offset
1126  // relative index.
1127 
1128  int64_t Offset = FrameInfo.getObjectOffset(Index);
1129  FIOp.ChangeToImmediate(Offset);
1130  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1131  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1132  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1133  .addImm(Offset);
1134  FIOp.ChangeToRegister(TmpReg, false, false, true);
1135  }
1136  }
1137  }
1138 }
1139 
1141  #define AMDGPU_REG_ASM_NAMES
1142  #include "AMDGPURegAsmNames.inc.cpp"
1143 
1144  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1145  if (Reg >= BeginReg && Reg <= EndReg) { \
1146  unsigned Index = Reg - BeginReg; \
1147  assert(Index < array_lengthof(RegTable)); \
1148  return RegTable[Index]; \
1149  }
1150 
1151  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1152  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1153  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1154  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1155  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1156  VGPR96RegNames);
1157 
1158  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1159  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1160  VGPR128RegNames);
1161  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1162  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1163  SGPR128RegNames);
1164 
1165  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1166  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1167  VGPR256RegNames);
1168 
1169  REG_RANGE(
1170  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1171  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1172  VGPR512RegNames);
1173 
1174  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1175  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1176  SGPR256RegNames);
1177 
1178  REG_RANGE(
1179  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1180  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1181  SGPR512RegNames
1182  );
1183 
1184 #undef REG_RANGE
1185 
1186  // FIXME: Rename flat_scr so we don't need to special case this.
1187  switch (Reg) {
1188  case AMDGPU::FLAT_SCR:
1189  return "flat_scratch";
1190  case AMDGPU::FLAT_SCR_LO:
1191  return "flat_scratch_lo";
1192  case AMDGPU::FLAT_SCR_HI:
1193  return "flat_scratch_hi";
1194  default:
1195  // For the special named registers the default is fine.
1197  }
1198 }
1199 
1200 // FIXME: This is very slow. It might be worth creating a map from physreg to
1201 // register class.
1204 
1205  static const TargetRegisterClass *const BaseClasses[] = {
1206  &AMDGPU::VGPR_32RegClass,
1207  &AMDGPU::SReg_32RegClass,
1208  &AMDGPU::VReg_64RegClass,
1209  &AMDGPU::SReg_64RegClass,
1210  &AMDGPU::VReg_96RegClass,
1211  &AMDGPU::VReg_128RegClass,
1212  &AMDGPU::SReg_128RegClass,
1213  &AMDGPU::VReg_256RegClass,
1214  &AMDGPU::SReg_256RegClass,
1215  &AMDGPU::VReg_512RegClass,
1216  &AMDGPU::SReg_512RegClass,
1217  &AMDGPU::SCC_CLASSRegClass,
1218  };
1219 
1220  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1221  if (BaseClass->contains(Reg)) {
1222  return BaseClass;
1223  }
1224  }
1225  return nullptr;
1226 }
1227 
1228 // TODO: It might be helpful to have some target specific flags in
1229 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1231  unsigned Size = getRegSizeInBits(*RC);
1232  if (Size < 32)
1233  return false;
1234  switch (Size) {
1235  case 32:
1236  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1237  case 64:
1238  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1239  case 96:
1240  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1241  case 128:
1242  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1243  case 256:
1244  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1245  case 512:
1246  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1247  default:
1248  llvm_unreachable("Invalid register class size");
1249  }
1250 }
1251 
1253  const TargetRegisterClass *SRC) const {
1254  switch (getRegSizeInBits(*SRC)) {
1255  case 32:
1256  return &AMDGPU::VGPR_32RegClass;
1257  case 64:
1258  return &AMDGPU::VReg_64RegClass;
1259  case 96:
1260  return &AMDGPU::VReg_96RegClass;
1261  case 128:
1262  return &AMDGPU::VReg_128RegClass;
1263  case 256:
1264  return &AMDGPU::VReg_256RegClass;
1265  case 512:
1266  return &AMDGPU::VReg_512RegClass;
1267  default:
1268  llvm_unreachable("Invalid register class size");
1269  }
1270 }
1271 
1273  const TargetRegisterClass *VRC) const {
1274  switch (getRegSizeInBits(*VRC)) {
1275  case 32:
1276  return &AMDGPU::SGPR_32RegClass;
1277  case 64:
1278  return &AMDGPU::SReg_64RegClass;
1279  case 128:
1280  return &AMDGPU::SReg_128RegClass;
1281  case 256:
1282  return &AMDGPU::SReg_256RegClass;
1283  case 512:
1284  return &AMDGPU::SReg_512RegClass;
1285  default:
1286  llvm_unreachable("Invalid register class size");
1287  }
1288 }
1289 
1291  const TargetRegisterClass *RC, unsigned SubIdx) const {
1292  if (SubIdx == AMDGPU::NoSubRegister)
1293  return RC;
1294 
1295  // We can assume that each lane corresponds to one 32-bit register.
1296  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1297  if (isSGPRClass(RC)) {
1298  switch (Count) {
1299  case 1:
1300  return &AMDGPU::SGPR_32RegClass;
1301  case 2:
1302  return &AMDGPU::SReg_64RegClass;
1303  case 4:
1304  return &AMDGPU::SReg_128RegClass;
1305  case 8:
1306  return &AMDGPU::SReg_256RegClass;
1307  case 16: /* fall-through */
1308  default:
1309  llvm_unreachable("Invalid sub-register class size");
1310  }
1311  } else {
1312  switch (Count) {
1313  case 1:
1314  return &AMDGPU::VGPR_32RegClass;
1315  case 2:
1316  return &AMDGPU::VReg_64RegClass;
1317  case 3:
1318  return &AMDGPU::VReg_96RegClass;
1319  case 4:
1320  return &AMDGPU::VReg_128RegClass;
1321  case 8:
1322  return &AMDGPU::VReg_256RegClass;
1323  case 16: /* fall-through */
1324  default:
1325  llvm_unreachable("Invalid sub-register class size");
1326  }
1327  }
1328 }
1329 
1331  const TargetRegisterClass *DefRC,
1332  unsigned DefSubReg,
1333  const TargetRegisterClass *SrcRC,
1334  unsigned SrcSubReg) const {
1335  // We want to prefer the smallest register class possible, so we don't want to
1336  // stop and rewrite on anything that looks like a subregister
1337  // extract. Operations mostly don't care about the super register class, so we
1338  // only want to stop on the most basic of copies between the same register
1339  // class.
1340  //
1341  // e.g. if we have something like
1342  // %0 = ...
1343  // %1 = ...
1344  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1345  // %3 = COPY %2, sub0
1346  //
1347  // We want to look through the COPY to find:
1348  // => %3 = COPY %0
1349 
1350  // Plain copy.
1351  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1352 }
1353 
1354 /// \brief Returns a register that is not used at any point in the function.
1355 /// If all registers are used, then this function will return
1356 // AMDGPU::NoRegister.
1357 unsigned
1359  const TargetRegisterClass *RC,
1360  const MachineFunction &MF) const {
1361 
1362  for (unsigned Reg : *RC)
1363  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1364  return Reg;
1365  return AMDGPU::NoRegister;
1366 }
1367 
1369  unsigned EltSize) const {
1370  if (EltSize == 4) {
1371  static const int16_t Sub0_15[] = {
1372  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1373  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1374  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1375  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1376  };
1377 
1378  static const int16_t Sub0_7[] = {
1379  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1380  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1381  };
1382 
1383  static const int16_t Sub0_3[] = {
1384  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1385  };
1386 
1387  static const int16_t Sub0_2[] = {
1388  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1389  };
1390 
1391  static const int16_t Sub0_1[] = {
1392  AMDGPU::sub0, AMDGPU::sub1,
1393  };
1394 
1395  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1396  case 32:
1397  return {};
1398  case 64:
1399  return makeArrayRef(Sub0_1);
1400  case 96:
1401  return makeArrayRef(Sub0_2);
1402  case 128:
1403  return makeArrayRef(Sub0_3);
1404  case 256:
1405  return makeArrayRef(Sub0_7);
1406  case 512:
1407  return makeArrayRef(Sub0_15);
1408  default:
1409  llvm_unreachable("unhandled register size");
1410  }
1411  }
1412 
1413  if (EltSize == 8) {
1414  static const int16_t Sub0_15_64[] = {
1415  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1416  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1417  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1418  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1419  };
1420 
1421  static const int16_t Sub0_7_64[] = {
1422  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1423  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1424  };
1425 
1426 
1427  static const int16_t Sub0_3_64[] = {
1428  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1429  };
1430 
1431  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1432  case 64:
1433  return {};
1434  case 128:
1435  return makeArrayRef(Sub0_3_64);
1436  case 256:
1437  return makeArrayRef(Sub0_7_64);
1438  case 512:
1439  return makeArrayRef(Sub0_15_64);
1440  default:
1441  llvm_unreachable("unhandled register size");
1442  }
1443  }
1444 
1445  assert(EltSize == 16 && "unhandled register spill split size");
1446 
1447  static const int16_t Sub0_15_128[] = {
1448  AMDGPU::sub0_sub1_sub2_sub3,
1449  AMDGPU::sub4_sub5_sub6_sub7,
1450  AMDGPU::sub8_sub9_sub10_sub11,
1451  AMDGPU::sub12_sub13_sub14_sub15
1452  };
1453 
1454  static const int16_t Sub0_7_128[] = {
1455  AMDGPU::sub0_sub1_sub2_sub3,
1456  AMDGPU::sub4_sub5_sub6_sub7
1457  };
1458 
1459  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1460  case 128:
1461  return {};
1462  case 256:
1463  return makeArrayRef(Sub0_7_128);
1464  case 512:
1465  return makeArrayRef(Sub0_15_128);
1466  default:
1467  llvm_unreachable("unhandled register size");
1468  }
1469 }
1470 
1471 const TargetRegisterClass*
1473  unsigned Reg) const {
1475  return MRI.getRegClass(Reg);
1476 
1477  return getPhysRegClass(Reg);
1478 }
1479 
1481  unsigned Reg) const {
1482  return hasVGPRs(getRegClassForReg(MRI, Reg));
1483 }
1484 
1486  const TargetRegisterClass *SrcRC,
1487  unsigned SubReg,
1488  const TargetRegisterClass *DstRC,
1489  unsigned DstSubReg,
1490  const TargetRegisterClass *NewRC,
1491  LiveIntervals &LIS) const {
1492  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1493  unsigned DstSize = getRegSizeInBits(*DstRC);
1494  unsigned NewSize = getRegSizeInBits(*NewRC);
1495 
1496  // Do not increase size of registers beyond dword, we would need to allocate
1497  // adjacent registers and constraint regalloc more than needed.
1498 
1499  // Always allow dword coalescing.
1500  if (SrcSize <= 32 || DstSize <= 32)
1501  return true;
1502 
1503  return NewSize <= DstSize || NewSize <= SrcSize;
1504 }
1505 
1507  MachineFunction &MF) const {
1508 
1509  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1511 
1512  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1513  MF.getFunction());
1514  switch (RC->getID()) {
1515  default:
1516  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1517  case AMDGPU::VGPR_32RegClassID:
1518  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1519  case AMDGPU::SGPR_32RegClassID:
1520  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1521  }
1522 }
1523 
1525  unsigned Idx) const {
1526  if (Idx == getVGPRPressureSet())
1527  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1528  const_cast<MachineFunction &>(MF));
1529 
1530  if (Idx == getSGPRPressureSet())
1531  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1532  const_cast<MachineFunction &>(MF));
1533 
1534  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1535 }
1536 
1537 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1538  static const int Empty[] = { -1 };
1539 
1540  if (hasRegUnit(AMDGPU::M0, RegUnit))
1541  return Empty;
1542  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1543 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
SIRegisterInfo(const SISubtarget &ST)
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
const SIInstrInfo * getInstrInfo() const override
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:652
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:34
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const
Find an unused register of the specified register class.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
unsigned getSubRegFromChannel(unsigned Channel) const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:710
A description of a memory reference used in the backend.
bool hasInv2PiInlineImm() const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Reg
All possible values of the reg field in the ModR/M byte.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:398
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
bool hasScalarStores() const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:393
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:142
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
const unsigned Kind
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
bool hasCalls() const
Return true if the current function has any function calls.