LLVM  6.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/LLVMContext.h"
24 
25 using namespace llvm;
26 
27 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
28  for (unsigned i = 0; PSets[i] != -1; ++i) {
29  if (PSets[i] == (int)PSetID)
30  return true;
31  }
32  return false;
33 }
34 
35 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
36  BitVector &PressureSets) const {
37  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
38  const int *PSets = getRegUnitPressureSets(*U);
39  if (hasPressureSet(PSets, PSetID)) {
40  PressureSets.set(PSetID);
41  break;
42  }
43  }
44 }
45 
47  "amdgpu-spill-sgpr-to-smem",
48  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
49  cl::init(false));
50 
52  "amdgpu-spill-sgpr-to-vgpr",
53  cl::desc("Enable spilling VGPRs to SGPRs"),
55  cl::init(true));
56 
59  SGPRPressureSets(getNumRegPressureSets()),
60  VGPRPressureSets(getNumRegPressureSets()),
61  SpillSGPRToVGPR(false),
62  SpillSGPRToSMEM(false) {
63  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
64  SpillSGPRToSMEM = true;
65  else if (EnableSpillSGPRToVGPR)
66  SpillSGPRToVGPR = true;
67 
68  unsigned NumRegPressureSets = getNumRegPressureSets();
69 
70  SGPRSetID = NumRegPressureSets;
71  VGPRSetID = NumRegPressureSets;
72 
73  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
74  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
75  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
76  }
77 
78  // Determine the number of reg units for each pressure set.
79  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
80  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
81  const int *PSets = getRegUnitPressureSets(i);
82  for (unsigned j = 0; PSets[j] != -1; ++j) {
83  ++PressureSetRegUnits[PSets[j]];
84  }
85  }
86 
87  unsigned VGPRMax = 0, SGPRMax = 0;
88  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
89  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
90  VGPRSetID = i;
91  VGPRMax = PressureSetRegUnits[i];
92  continue;
93  }
94  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
95  SGPRSetID = i;
96  SGPRMax = PressureSetRegUnits[i];
97  }
98  }
99 
100  assert(SGPRSetID < NumRegPressureSets &&
101  VGPRSetID < NumRegPressureSets);
102 }
103 
104 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
105  MCRegAliasIterator R(Reg, this, true);
106 
107  for (; R.isValid(); ++R)
108  Reserved.set(*R);
109 }
110 
112  const MachineFunction &MF) const {
113 
114  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
115  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
116  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
117  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
118 }
119 
120 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
121  unsigned Reg;
122 
123  // Try to place it in a hole after PrivateSegmentBufferReg.
124  if (RegCount & 3) {
125  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
126  // alignment constraints, so we have a hole where can put the wave offset.
127  Reg = RegCount - 1;
128  } else {
129  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
130  // wave offset before it.
131  Reg = RegCount - 5;
132  }
133 
134  return Reg;
135 }
136 
138  const MachineFunction &MF) const {
139  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
141  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
142 }
143 
145  const MachineFunction &MF) const {
146  return AMDGPU::SGPR32;
147 }
148 
150  BitVector Reserved(getNumRegs());
151 
152  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
153  // this seems likely to result in bugs, so I'm marking them as reserved.
154  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
155  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
156 
157  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
158  reserveRegisterTuples(Reserved, AMDGPU::M0);
159 
160  // Reserve the memory aperture registers.
161  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
162  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
163  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
164  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
165 
166  // Reserve Trap Handler registers - support is not implemented in Codegen.
167  reserveRegisterTuples(Reserved, AMDGPU::TBA);
168  reserveRegisterTuples(Reserved, AMDGPU::TMA);
169  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
170  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
171  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
172  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
173  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
174  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
175 
176  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
177 
178  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182  reserveRegisterTuples(Reserved, Reg);
183  }
184 
185  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189  reserveRegisterTuples(Reserved, Reg);
190  }
191 
193 
194  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
198  }
199 
200  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201  if (ScratchRSrcReg != AMDGPU::NoRegister) {
202  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
203  // to spill.
204  // TODO: May need to reserve a VGPR if doing LDS spilling.
205  reserveRegisterTuples(Reserved, ScratchRSrcReg);
206  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
207  }
208 
209  // We have to assume the SP is needed in case there are calls in the function,
210  // which is detected after the function is lowered. If we aren't really going
211  // to need SP, don't bother reserving it.
212  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
213 
214  if (StackPtrReg != AMDGPU::NoRegister) {
215  reserveRegisterTuples(Reserved, StackPtrReg);
216  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
217  }
218 
219  unsigned FrameReg = MFI->getFrameOffsetReg();
220  if (FrameReg != AMDGPU::NoRegister) {
221  reserveRegisterTuples(Reserved, FrameReg);
222  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
223  }
224 
225  return Reserved;
226 }
227 
230  if (Info->isEntryFunction()) {
231  const MachineFrameInfo &MFI = Fn.getFrameInfo();
232  return MFI.hasStackObjects() || MFI.hasCalls();
233  }
234 
235  // May need scavenger for dealing with callee saved registers.
236  return true;
237 }
238 
240  const MachineFunction &MF) const {
241  const MachineFrameInfo &MFI = MF.getFrameInfo();
242  if (MFI.hasStackObjects())
243  return true;
244 
245  // May need to deal with callee saved registers.
247  return !Info->isEntryFunction();
248 }
249 
251  const MachineFunction &MF) const {
252  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253  // create a virtual register for it during frame index elimination, so the
254  // scavenger is directly needed.
255  return MF.getFrameInfo().hasStackObjects() &&
256  MF.getSubtarget<SISubtarget>().hasScalarStores() &&
257  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
258 }
259 
261  const MachineFunction &) const {
262  // There are no special dedicated stack or frame pointers.
263  return true;
264 }
265 
267  // This helps catch bugs as verifier errors.
268  return true;
269 }
270 
273 
274  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275  AMDGPU::OpName::offset);
276  return MI->getOperand(OffIdx).getImm();
277 }
278 
280  int Idx) const {
281  if (!SIInstrInfo::isMUBUF(*MI))
282  return 0;
283 
285  AMDGPU::OpName::vaddr) &&
286  "Should never see frame index on non-address operand");
287 
288  return getMUBUFInstrOffset(MI);
289 }
290 
292  if (!MI->mayLoadOrStore())
293  return false;
294 
295  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
296 
297  return !isUInt<12>(FullOffset);
298 }
299 
301  unsigned BaseReg,
302  int FrameIdx,
303  int64_t Offset) const {
305  DebugLoc DL; // Defaults to "unknown"
306 
307  if (Ins != MBB->end())
308  DL = Ins->getDebugLoc();
309 
310  MachineFunction *MF = MBB->getParent();
311  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
312  const SIInstrInfo *TII = Subtarget.getInstrInfo();
313 
314  if (Offset == 0) {
315  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316  .addFrameIndex(FrameIdx);
317  return;
318  }
319 
321  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
322 
323  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
324 
325  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
326  .addImm(Offset);
327  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328  .addFrameIndex(FrameIdx);
329 
330  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331  .addReg(OffsetReg, RegState::Kill)
332  .addReg(FIReg);
333 }
334 
336  int64_t Offset) const {
337 
338  MachineBasicBlock *MBB = MI.getParent();
339  MachineFunction *MF = MBB->getParent();
340  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
341  const SIInstrInfo *TII = Subtarget.getInstrInfo();
342 
343 #ifndef NDEBUG
344  // FIXME: Is it possible to be storing a frame index to itself?
345  bool SeenFI = false;
346  for (const MachineOperand &MO: MI.operands()) {
347  if (MO.isFI()) {
348  if (SeenFI)
349  llvm_unreachable("should not see multiple frame indices");
350 
351  SeenFI = true;
352  }
353  }
354 #endif
355 
356  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358  assert(TII->isMUBUF(MI));
359  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361  "should only be seeing frame offset relative FrameIndex");
362 
363 
364  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365  int64_t NewOffset = OffsetOp->getImm() + Offset;
366  assert(isUInt<12>(NewOffset) && "offset should be legal");
367 
368  FIOp->ChangeToRegister(BaseReg, false);
369  OffsetOp->setImm(NewOffset);
370 }
371 
373  unsigned BaseReg,
374  int64_t Offset) const {
375  if (!SIInstrInfo::isMUBUF(*MI))
376  return false;
377 
378  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
379 
380  return isUInt<12>(NewOffset);
381 }
382 
384  const MachineFunction &MF, unsigned Kind) const {
385  // This is inaccurate. It depends on the instruction and address space. The
386  // only place where we should hit this is for dealing with frame indexes /
387  // private accesses, so this is correct in that case.
388  return &AMDGPU::VGPR_32RegClass;
389 }
390 
391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
392 
393  switch (Op) {
394  case AMDGPU::SI_SPILL_S512_SAVE:
395  case AMDGPU::SI_SPILL_S512_RESTORE:
396  case AMDGPU::SI_SPILL_V512_SAVE:
397  case AMDGPU::SI_SPILL_V512_RESTORE:
398  return 16;
399  case AMDGPU::SI_SPILL_S256_SAVE:
400  case AMDGPU::SI_SPILL_S256_RESTORE:
401  case AMDGPU::SI_SPILL_V256_SAVE:
402  case AMDGPU::SI_SPILL_V256_RESTORE:
403  return 8;
404  case AMDGPU::SI_SPILL_S128_SAVE:
405  case AMDGPU::SI_SPILL_S128_RESTORE:
406  case AMDGPU::SI_SPILL_V128_SAVE:
407  case AMDGPU::SI_SPILL_V128_RESTORE:
408  return 4;
409  case AMDGPU::SI_SPILL_V96_SAVE:
410  case AMDGPU::SI_SPILL_V96_RESTORE:
411  return 3;
412  case AMDGPU::SI_SPILL_S64_SAVE:
413  case AMDGPU::SI_SPILL_S64_RESTORE:
414  case AMDGPU::SI_SPILL_V64_SAVE:
415  case AMDGPU::SI_SPILL_V64_RESTORE:
416  return 2;
417  case AMDGPU::SI_SPILL_S32_SAVE:
418  case AMDGPU::SI_SPILL_S32_RESTORE:
419  case AMDGPU::SI_SPILL_V32_SAVE:
420  case AMDGPU::SI_SPILL_V32_RESTORE:
421  return 1;
422  default: llvm_unreachable("Invalid spill opcode");
423  }
424 }
425 
426 static int getOffsetMUBUFStore(unsigned Opc) {
427  switch (Opc) {
428  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
442  default:
443  return -1;
444  }
445 }
446 
447 static int getOffsetMUBUFLoad(unsigned Opc) {
448  switch (Opc) {
449  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
475  default:
476  return -1;
477  }
478 }
479 
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
483  MachineFrameInfo &MFI,
485  int Index,
486  int64_t Offset) {
487  MachineBasicBlock *MBB = MI->getParent();
488  const DebugLoc &DL = MI->getDebugLoc();
489  bool IsStore = MI->mayStore();
490 
491  unsigned Opc = MI->getOpcode();
492  int LoadStoreOp = IsStore ?
494  if (LoadStoreOp == -1)
495  return false;
496 
497  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498  MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
499  .add(*Reg)
500  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
501  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
502  .addImm(Offset)
503  .addImm(0) // glc
504  .addImm(0) // slc
505  .addImm(0) // tfe
506  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
507 
508  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
509  AMDGPU::OpName::vdata_in);
510  if (VDataIn)
511  NewMI.add(*VDataIn);
512  return true;
513 }
514 
515 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
516  unsigned LoadStoreOp,
517  int Index,
518  unsigned ValueReg,
519  bool IsKill,
520  unsigned ScratchRsrcReg,
521  unsigned ScratchOffsetReg,
522  int64_t InstOffset,
523  MachineMemOperand *MMO,
524  RegScavenger *RS) const {
525  MachineBasicBlock *MBB = MI->getParent();
526  MachineFunction *MF = MI->getParent()->getParent();
527  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
528  const SIInstrInfo *TII = ST.getInstrInfo();
529  const MachineFrameInfo &MFI = MF->getFrameInfo();
530 
531  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
532  const DebugLoc &DL = MI->getDebugLoc();
533  bool IsStore = Desc.mayStore();
534 
535  bool RanOutOfSGPRs = false;
536  bool Scavenged = false;
537  unsigned SOffset = ScratchOffsetReg;
538 
539  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
540  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
541  unsigned Size = NumSubRegs * 4;
542  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
543  const int64_t OriginalImmOffset = Offset;
544 
545  unsigned Align = MFI.getObjectAlignment(Index);
546  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
547 
548  if (!isUInt<12>(Offset + Size)) {
549  SOffset = AMDGPU::NoRegister;
550 
551  // We don't have access to the register scavenger if this function is called
552  // during PEI::scavengeFrameVirtualRegs().
553  if (RS)
554  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
555 
556  if (SOffset == AMDGPU::NoRegister) {
557  // There are no free SGPRs, and since we are in the process of spilling
558  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
559  // on SI/CI and on VI it is true until we implement spilling using scalar
560  // stores), we have no way to free up an SGPR. Our solution here is to
561  // add the offset directly to the ScratchOffset register, and then
562  // subtract the offset after the spill to return ScratchOffset to it's
563  // original value.
564  RanOutOfSGPRs = true;
565  SOffset = ScratchOffsetReg;
566  } else {
567  Scavenged = true;
568  }
569 
570  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
571  .addReg(ScratchOffsetReg)
572  .addImm(Offset);
573 
574  Offset = 0;
575  }
576 
577  const unsigned EltSize = 4;
578 
579  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
580  unsigned SubReg = NumSubRegs == 1 ?
581  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
582 
583  unsigned SOffsetRegState = 0;
584  unsigned SrcDstRegState = getDefRegState(!IsStore);
585  if (i + 1 == e) {
586  SOffsetRegState |= getKillRegState(Scavenged);
587  // The last implicit use carries the "Kill" flag.
588  SrcDstRegState |= getKillRegState(IsKill);
589  }
590 
591  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
592  MachineMemOperand *NewMMO
593  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
594  EltSize, MinAlign(Align, EltSize * i));
595 
596  auto MIB = BuildMI(*MBB, MI, DL, Desc)
597  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
598  .addReg(ScratchRsrcReg)
599  .addReg(SOffset, SOffsetRegState)
600  .addImm(Offset)
601  .addImm(0) // glc
602  .addImm(0) // slc
603  .addImm(0) // tfe
604  .addMemOperand(NewMMO);
605 
606  if (NumSubRegs > 1)
607  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
608  }
609 
610  if (RanOutOfSGPRs) {
611  // Subtract the offset we added to the ScratchOffset register.
612  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
613  .addReg(ScratchOffsetReg)
614  .addImm(OriginalImmOffset);
615  }
616 }
617 
618 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
619  bool Store) {
620  if (SuperRegSize % 16 == 0) {
621  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
622  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
623  }
624 
625  if (SuperRegSize % 8 == 0) {
626  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
627  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
628  }
629 
630  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
631  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
632 }
633 
635  int Index,
636  RegScavenger *RS,
637  bool OnlyToVGPR) const {
638  MachineBasicBlock *MBB = MI->getParent();
639  MachineFunction *MF = MBB->getParent();
641 
643  = MFI->getSGPRToVGPRSpills(Index);
644  bool SpillToVGPR = !VGPRSpills.empty();
645  if (OnlyToVGPR && !SpillToVGPR)
646  return false;
647 
649  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
650  const SIInstrInfo *TII = ST.getInstrInfo();
651 
652  unsigned SuperReg = MI->getOperand(0).getReg();
653  bool IsKill = MI->getOperand(0).isKill();
654  const DebugLoc &DL = MI->getDebugLoc();
655 
656  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
657 
658  bool SpillToSMEM = spillSGPRToSMEM();
659  if (SpillToSMEM && OnlyToVGPR)
660  return false;
661 
662  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
663 
664  unsigned OffsetReg = AMDGPU::M0;
665  unsigned M0CopyReg = AMDGPU::NoRegister;
666 
667  if (SpillToSMEM) {
668  if (RS->isRegUsed(AMDGPU::M0)) {
669  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
670  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
671  .addReg(AMDGPU::M0);
672  }
673  }
674 
675  unsigned ScalarStoreOp;
676  unsigned EltSize = 4;
677  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
678  if (SpillToSMEM && isSGPRClass(RC)) {
679  // XXX - if private_element_size is larger than 4 it might be useful to be
680  // able to spill wider vmem spills.
681  std::tie(EltSize, ScalarStoreOp) =
682  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
683  }
684 
685  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
686  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
687 
688  // SubReg carries the "Kill" flag when SubReg == SuperReg.
689  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
690  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
691  unsigned SubReg = NumSubRegs == 1 ?
692  SuperReg : getSubReg(SuperReg, SplitParts[i]);
693 
694  if (SpillToSMEM) {
695  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
696 
697  // The allocated memory size is really the wavefront size * the frame
698  // index size. The widest register class is 64 bytes, so a 4-byte scratch
699  // allocation is enough to spill this in a single stack object.
700  //
701  // FIXME: Frame size/offsets are computed earlier than this, so the extra
702  // space is still unnecessarily allocated.
703 
704  unsigned Align = FrameInfo.getObjectAlignment(Index);
705  MachinePointerInfo PtrInfo
706  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
707  MachineMemOperand *MMO
709  EltSize, MinAlign(Align, EltSize * i));
710 
711  // SMEM instructions only support a single offset, so increment the wave
712  // offset.
713 
714  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
715  if (Offset != 0) {
716  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
717  .addReg(MFI->getFrameOffsetReg())
718  .addImm(Offset);
719  } else {
720  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
721  .addReg(MFI->getFrameOffsetReg());
722  }
723 
724  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
725  .addReg(SubReg, getKillRegState(IsKill)) // sdata
726  .addReg(MFI->getScratchRSrcReg()) // sbase
727  .addReg(OffsetReg, RegState::Kill) // soff
728  .addImm(0) // glc
729  .addMemOperand(MMO);
730 
731  continue;
732  }
733 
734  if (SpillToVGPR) {
735  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
736 
737  BuildMI(*MBB, MI, DL,
738  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
739  Spill.VGPR)
740  .addReg(SubReg, getKillRegState(IsKill))
741  .addImm(Spill.Lane);
742 
743  // FIXME: Since this spills to another register instead of an actual
744  // frame index, we should delete the frame index when all references to
745  // it are fixed.
746  } else {
747  // XXX - Can to VGPR spill fail for some subregisters but not others?
748  if (OnlyToVGPR)
749  return false;
750 
751  // Spill SGPR to a frame index.
752  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
753  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
754  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
755 
757  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
758  .addReg(SubReg, SubKillState);
759 
760 
761  // There could be undef components of a spilled super register.
762  // TODO: Can we detect this and skip the spill?
763  if (NumSubRegs > 1) {
764  // The last implicit use of the SuperReg carries the "Kill" flag.
765  unsigned SuperKillState = 0;
766  if (i + 1 == e)
767  SuperKillState |= getKillRegState(IsKill);
768  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
769  }
770 
771  unsigned Align = FrameInfo.getObjectAlignment(Index);
772  MachinePointerInfo PtrInfo
773  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
774  MachineMemOperand *MMO
776  EltSize, MinAlign(Align, EltSize * i));
777  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
778  .addReg(TmpReg, RegState::Kill) // src
779  .addFrameIndex(Index) // vaddr
780  .addReg(MFI->getScratchRSrcReg()) // srrsrc
781  .addReg(MFI->getFrameOffsetReg()) // soffset
782  .addImm(i * 4) // offset
783  .addMemOperand(MMO);
784  }
785  }
786 
787  if (M0CopyReg != AMDGPU::NoRegister) {
788  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
789  .addReg(M0CopyReg, RegState::Kill);
790  }
791 
792  MI->eraseFromParent();
793  MFI->addToSpilledSGPRs(NumSubRegs);
794  return true;
795 }
796 
798  int Index,
799  RegScavenger *RS,
800  bool OnlyToVGPR) const {
801  MachineFunction *MF = MI->getParent()->getParent();
803  MachineBasicBlock *MBB = MI->getParent();
805 
807  = MFI->getSGPRToVGPRSpills(Index);
808  bool SpillToVGPR = !VGPRSpills.empty();
809  if (OnlyToVGPR && !SpillToVGPR)
810  return false;
811 
812  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
813  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
814  const SIInstrInfo *TII = ST.getInstrInfo();
815  const DebugLoc &DL = MI->getDebugLoc();
816 
817  unsigned SuperReg = MI->getOperand(0).getReg();
818  bool SpillToSMEM = spillSGPRToSMEM();
819  if (SpillToSMEM && OnlyToVGPR)
820  return false;
821 
822  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
823 
824  unsigned OffsetReg = AMDGPU::M0;
825  unsigned M0CopyReg = AMDGPU::NoRegister;
826 
827  if (SpillToSMEM) {
828  if (RS->isRegUsed(AMDGPU::M0)) {
829  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
830  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
831  .addReg(AMDGPU::M0);
832  }
833  }
834 
835  unsigned EltSize = 4;
836  unsigned ScalarLoadOp;
837 
838  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
839  if (SpillToSMEM && isSGPRClass(RC)) {
840  // XXX - if private_element_size is larger than 4 it might be useful to be
841  // able to spill wider vmem spills.
842  std::tie(EltSize, ScalarLoadOp) =
843  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
844  }
845 
846  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
847  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
848 
849  // SubReg carries the "Kill" flag when SubReg == SuperReg.
850  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
851 
852  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
853  unsigned SubReg = NumSubRegs == 1 ?
854  SuperReg : getSubReg(SuperReg, SplitParts[i]);
855 
856  if (SpillToSMEM) {
857  // FIXME: Size may be > 4 but extra bytes wasted.
858  unsigned Align = FrameInfo.getObjectAlignment(Index);
859  MachinePointerInfo PtrInfo
860  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
861  MachineMemOperand *MMO
863  EltSize, MinAlign(Align, EltSize * i));
864 
865  // Add i * 4 offset
866  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
867  if (Offset != 0) {
868  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
869  .addReg(MFI->getFrameOffsetReg())
870  .addImm(Offset);
871  } else {
872  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
873  .addReg(MFI->getFrameOffsetReg());
874  }
875 
876  auto MIB =
877  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
878  .addReg(MFI->getScratchRSrcReg()) // sbase
879  .addReg(OffsetReg, RegState::Kill) // soff
880  .addImm(0) // glc
881  .addMemOperand(MMO);
882 
883  if (NumSubRegs > 1)
884  MIB.addReg(SuperReg, RegState::ImplicitDefine);
885 
886  continue;
887  }
888 
889  if (SpillToVGPR) {
890  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
891  auto MIB =
892  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
893  SubReg)
894  .addReg(Spill.VGPR)
895  .addImm(Spill.Lane);
896 
897  if (NumSubRegs > 1)
898  MIB.addReg(SuperReg, RegState::ImplicitDefine);
899  } else {
900  if (OnlyToVGPR)
901  return false;
902 
903  // Restore SGPR from a stack slot.
904  // FIXME: We should use S_LOAD_DWORD here for VI.
905  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
906  unsigned Align = FrameInfo.getObjectAlignment(Index);
907 
908  MachinePointerInfo PtrInfo
909  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
910 
911  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
912  MachineMemOperand::MOLoad, EltSize,
913  MinAlign(Align, EltSize * i));
914 
915  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
916  .addFrameIndex(Index) // vaddr
917  .addReg(MFI->getScratchRSrcReg()) // srsrc
918  .addReg(MFI->getFrameOffsetReg()) // soffset
919  .addImm(i * 4) // offset
920  .addMemOperand(MMO);
921 
922  auto MIB =
923  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
924  .addReg(TmpReg, RegState::Kill);
925 
926  if (NumSubRegs > 1)
927  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
928  }
929  }
930 
931  if (M0CopyReg != AMDGPU::NoRegister) {
932  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
933  .addReg(M0CopyReg, RegState::Kill);
934  }
935 
936  MI->eraseFromParent();
937  return true;
938 }
939 
940 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
941 /// a VGPR and the stack slot can be safely eliminated when all other users are
942 /// handled.
945  int FI,
946  RegScavenger *RS) const {
947  switch (MI->getOpcode()) {
948  case AMDGPU::SI_SPILL_S512_SAVE:
949  case AMDGPU::SI_SPILL_S256_SAVE:
950  case AMDGPU::SI_SPILL_S128_SAVE:
951  case AMDGPU::SI_SPILL_S64_SAVE:
952  case AMDGPU::SI_SPILL_S32_SAVE:
953  return spillSGPR(MI, FI, RS, true);
954  case AMDGPU::SI_SPILL_S512_RESTORE:
955  case AMDGPU::SI_SPILL_S256_RESTORE:
956  case AMDGPU::SI_SPILL_S128_RESTORE:
957  case AMDGPU::SI_SPILL_S64_RESTORE:
958  case AMDGPU::SI_SPILL_S32_RESTORE:
959  return restoreSGPR(MI, FI, RS, true);
960  default:
961  llvm_unreachable("not an SGPR spill instruction");
962  }
963 }
964 
966  int SPAdj, unsigned FIOperandNum,
967  RegScavenger *RS) const {
968  MachineFunction *MF = MI->getParent()->getParent();
970  MachineBasicBlock *MBB = MI->getParent();
972  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
973  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
974  const SIInstrInfo *TII = ST.getInstrInfo();
975  DebugLoc DL = MI->getDebugLoc();
976 
977  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
978  int Index = MI->getOperand(FIOperandNum).getIndex();
979 
980  switch (MI->getOpcode()) {
981  // SGPR register spill
982  case AMDGPU::SI_SPILL_S512_SAVE:
983  case AMDGPU::SI_SPILL_S256_SAVE:
984  case AMDGPU::SI_SPILL_S128_SAVE:
985  case AMDGPU::SI_SPILL_S64_SAVE:
986  case AMDGPU::SI_SPILL_S32_SAVE: {
987  spillSGPR(MI, Index, RS);
988  break;
989  }
990 
991  // SGPR register restore
992  case AMDGPU::SI_SPILL_S512_RESTORE:
993  case AMDGPU::SI_SPILL_S256_RESTORE:
994  case AMDGPU::SI_SPILL_S128_RESTORE:
995  case AMDGPU::SI_SPILL_S64_RESTORE:
996  case AMDGPU::SI_SPILL_S32_RESTORE: {
997  restoreSGPR(MI, Index, RS);
998  break;
999  }
1000 
1001  // VGPR register spill
1002  case AMDGPU::SI_SPILL_V512_SAVE:
1003  case AMDGPU::SI_SPILL_V256_SAVE:
1004  case AMDGPU::SI_SPILL_V128_SAVE:
1005  case AMDGPU::SI_SPILL_V96_SAVE:
1006  case AMDGPU::SI_SPILL_V64_SAVE:
1007  case AMDGPU::SI_SPILL_V32_SAVE: {
1008  const MachineOperand *VData = TII->getNamedOperand(*MI,
1009  AMDGPU::OpName::vdata);
1010  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1011  Index,
1012  VData->getReg(), VData->isKill(),
1013  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1014  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1015  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1016  *MI->memoperands_begin(),
1017  RS);
1018  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1019  MI->eraseFromParent();
1020  break;
1021  }
1022  case AMDGPU::SI_SPILL_V32_RESTORE:
1023  case AMDGPU::SI_SPILL_V64_RESTORE:
1024  case AMDGPU::SI_SPILL_V96_RESTORE:
1025  case AMDGPU::SI_SPILL_V128_RESTORE:
1026  case AMDGPU::SI_SPILL_V256_RESTORE:
1027  case AMDGPU::SI_SPILL_V512_RESTORE: {
1028  const MachineOperand *VData = TII->getNamedOperand(*MI,
1029  AMDGPU::OpName::vdata);
1030 
1031  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1032  Index,
1033  VData->getReg(), VData->isKill(),
1034  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1035  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1036  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1037  *MI->memoperands_begin(),
1038  RS);
1039  MI->eraseFromParent();
1040  break;
1041  }
1042 
1043  default: {
1044  const DebugLoc &DL = MI->getDebugLoc();
1045  bool IsMUBUF = TII->isMUBUF(*MI);
1046 
1047  if (!IsMUBUF &&
1048  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1049  // Convert to an absolute stack address by finding the offset from the
1050  // scratch wave base and scaling by the wave size.
1051  //
1052  // In an entry function/kernel the stack address is already the absolute
1053  // address relative to the the scratch wave offset.
1054 
1055  unsigned DiffReg
1056  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1057 
1058  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1059  unsigned ResultReg = IsCopy ?
1060  MI->getOperand(0).getReg() :
1061  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1062 
1063  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1064  .addReg(MFI->getFrameOffsetReg())
1065  .addReg(MFI->getScratchWaveOffsetReg());
1066 
1067  int64_t Offset = FrameInfo.getObjectOffset(Index);
1068  if (Offset == 0) {
1069  // XXX - This never happens because of emergency scavenging slot at 0?
1070  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1071  .addImm(Log2_32(ST.getWavefrontSize()))
1072  .addReg(DiffReg);
1073  } else {
1074  unsigned CarryOut
1075  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
1076  unsigned ScaledReg
1077  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1078 
1079  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1080  .addImm(Log2_32(ST.getWavefrontSize()))
1081  .addReg(DiffReg, RegState::Kill);
1082 
1083  // TODO: Fold if use instruction is another add of a constant.
1085  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
1086  .addReg(CarryOut, RegState::Define | RegState::Dead)
1087  .addImm(Offset)
1088  .addReg(ScaledReg, RegState::Kill);
1089  } else {
1090  unsigned ConstOffsetReg
1091  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1092 
1093  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1094  .addImm(Offset);
1095  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
1096  .addReg(CarryOut, RegState::Define | RegState::Dead)
1097  .addReg(ConstOffsetReg, RegState::Kill)
1098  .addReg(ScaledReg, RegState::Kill);
1099  }
1100 
1101  MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC);
1102  }
1103 
1104  // Don't introduce an extra copy if we're just materializing in a mov.
1105  if (IsCopy)
1106  MI->eraseFromParent();
1107  else
1108  FIOp.ChangeToRegister(ResultReg, false, false, true);
1109  return;
1110  }
1111 
1112  if (IsMUBUF) {
1113  // Disable offen so we don't need a 0 vgpr base.
1114  assert(static_cast<int>(FIOperandNum) ==
1115  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1116  AMDGPU::OpName::vaddr));
1117 
1118  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1119  == MFI->getFrameOffsetReg());
1120 
1121  int64_t Offset = FrameInfo.getObjectOffset(Index);
1122  int64_t OldImm
1123  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1124  int64_t NewOffset = OldImm + Offset;
1125 
1126  if (isUInt<12>(NewOffset) &&
1127  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1128  MI->eraseFromParent();
1129  return;
1130  }
1131  }
1132 
1133  // If the offset is simply too big, don't convert to a scratch wave offset
1134  // relative index.
1135 
1136  int64_t Offset = FrameInfo.getObjectOffset(Index);
1137  FIOp.ChangeToImmediate(Offset);
1138  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1139  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1140  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1141  .addImm(Offset);
1142  FIOp.ChangeToRegister(TmpReg, false, false, true);
1143  }
1144  }
1145  }
1146 }
1147 
1149  #define AMDGPU_REG_ASM_NAMES
1150  #include "AMDGPURegAsmNames.inc.cpp"
1151 
1152  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1153  if (Reg >= BeginReg && Reg <= EndReg) { \
1154  unsigned Index = Reg - BeginReg; \
1155  assert(Index < array_lengthof(RegTable)); \
1156  return RegTable[Index]; \
1157  }
1158 
1159  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1160  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1161  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1162  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1163  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1164  VGPR96RegNames);
1165 
1166  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1167  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1168  VGPR128RegNames);
1169  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1170  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1171  SGPR128RegNames);
1172 
1173  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1174  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1175  VGPR256RegNames);
1176 
1177  REG_RANGE(
1178  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1179  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1180  VGPR512RegNames);
1181 
1182  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1183  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1184  SGPR256RegNames);
1185 
1186  REG_RANGE(
1187  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1188  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1189  SGPR512RegNames
1190  );
1191 
1192 #undef REG_RANGE
1193 
1194  // FIXME: Rename flat_scr so we don't need to special case this.
1195  switch (Reg) {
1196  case AMDGPU::FLAT_SCR:
1197  return "flat_scratch";
1198  case AMDGPU::FLAT_SCR_LO:
1199  return "flat_scratch_lo";
1200  case AMDGPU::FLAT_SCR_HI:
1201  return "flat_scratch_hi";
1202  default:
1203  // For the special named registers the default is fine.
1205  }
1206 }
1207 
1208 // FIXME: This is very slow. It might be worth creating a map from physreg to
1209 // register class.
1212 
1213  static const TargetRegisterClass *const BaseClasses[] = {
1214  &AMDGPU::VGPR_32RegClass,
1215  &AMDGPU::SReg_32RegClass,
1216  &AMDGPU::VReg_64RegClass,
1217  &AMDGPU::SReg_64RegClass,
1218  &AMDGPU::VReg_96RegClass,
1219  &AMDGPU::VReg_128RegClass,
1220  &AMDGPU::SReg_128RegClass,
1221  &AMDGPU::VReg_256RegClass,
1222  &AMDGPU::SReg_256RegClass,
1223  &AMDGPU::VReg_512RegClass,
1224  &AMDGPU::SReg_512RegClass,
1225  &AMDGPU::SCC_CLASSRegClass,
1226  };
1227 
1228  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1229  if (BaseClass->contains(Reg)) {
1230  return BaseClass;
1231  }
1232  }
1233  return nullptr;
1234 }
1235 
1236 // TODO: It might be helpful to have some target specific flags in
1237 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1239  unsigned Size = getRegSizeInBits(*RC);
1240  if (Size < 32)
1241  return false;
1242  switch (Size) {
1243  case 32:
1244  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1245  case 64:
1246  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1247  case 96:
1248  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1249  case 128:
1250  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1251  case 256:
1252  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1253  case 512:
1254  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1255  default:
1256  llvm_unreachable("Invalid register class size");
1257  }
1258 }
1259 
1261  const TargetRegisterClass *SRC) const {
1262  switch (getRegSizeInBits(*SRC)) {
1263  case 32:
1264  return &AMDGPU::VGPR_32RegClass;
1265  case 64:
1266  return &AMDGPU::VReg_64RegClass;
1267  case 96:
1268  return &AMDGPU::VReg_96RegClass;
1269  case 128:
1270  return &AMDGPU::VReg_128RegClass;
1271  case 256:
1272  return &AMDGPU::VReg_256RegClass;
1273  case 512:
1274  return &AMDGPU::VReg_512RegClass;
1275  default:
1276  llvm_unreachable("Invalid register class size");
1277  }
1278 }
1279 
1281  const TargetRegisterClass *VRC) const {
1282  switch (getRegSizeInBits(*VRC)) {
1283  case 32:
1284  return &AMDGPU::SGPR_32RegClass;
1285  case 64:
1286  return &AMDGPU::SReg_64RegClass;
1287  case 128:
1288  return &AMDGPU::SReg_128RegClass;
1289  case 256:
1290  return &AMDGPU::SReg_256RegClass;
1291  case 512:
1292  return &AMDGPU::SReg_512RegClass;
1293  default:
1294  llvm_unreachable("Invalid register class size");
1295  }
1296 }
1297 
1299  const TargetRegisterClass *RC, unsigned SubIdx) const {
1300  if (SubIdx == AMDGPU::NoSubRegister)
1301  return RC;
1302 
1303  // We can assume that each lane corresponds to one 32-bit register.
1304  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1305  if (isSGPRClass(RC)) {
1306  switch (Count) {
1307  case 1:
1308  return &AMDGPU::SGPR_32RegClass;
1309  case 2:
1310  return &AMDGPU::SReg_64RegClass;
1311  case 4:
1312  return &AMDGPU::SReg_128RegClass;
1313  case 8:
1314  return &AMDGPU::SReg_256RegClass;
1315  case 16: /* fall-through */
1316  default:
1317  llvm_unreachable("Invalid sub-register class size");
1318  }
1319  } else {
1320  switch (Count) {
1321  case 1:
1322  return &AMDGPU::VGPR_32RegClass;
1323  case 2:
1324  return &AMDGPU::VReg_64RegClass;
1325  case 3:
1326  return &AMDGPU::VReg_96RegClass;
1327  case 4:
1328  return &AMDGPU::VReg_128RegClass;
1329  case 8:
1330  return &AMDGPU::VReg_256RegClass;
1331  case 16: /* fall-through */
1332  default:
1333  llvm_unreachable("Invalid sub-register class size");
1334  }
1335  }
1336 }
1337 
1339  const TargetRegisterClass *DefRC,
1340  unsigned DefSubReg,
1341  const TargetRegisterClass *SrcRC,
1342  unsigned SrcSubReg) const {
1343  // We want to prefer the smallest register class possible, so we don't want to
1344  // stop and rewrite on anything that looks like a subregister
1345  // extract. Operations mostly don't care about the super register class, so we
1346  // only want to stop on the most basic of copies between the same register
1347  // class.
1348  //
1349  // e.g. if we have something like
1350  // vreg0 = ...
1351  // vreg1 = ...
1352  // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
1353  // vreg3 = COPY vreg2, sub0
1354  //
1355  // We want to look through the COPY to find:
1356  // => vreg3 = COPY vreg0
1357 
1358  // Plain copy.
1359  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1360 }
1361 
1362 /// \brief Returns a register that is not used at any point in the function.
1363 /// If all registers are used, then this function will return
1364 // AMDGPU::NoRegister.
1365 unsigned
1367  const TargetRegisterClass *RC,
1368  const MachineFunction &MF) const {
1369 
1370  for (unsigned Reg : *RC)
1371  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1372  return Reg;
1373  return AMDGPU::NoRegister;
1374 }
1375 
1377  unsigned EltSize) const {
1378  if (EltSize == 4) {
1379  static const int16_t Sub0_15[] = {
1380  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1381  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1382  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1383  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1384  };
1385 
1386  static const int16_t Sub0_7[] = {
1387  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1388  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1389  };
1390 
1391  static const int16_t Sub0_3[] = {
1392  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1393  };
1394 
1395  static const int16_t Sub0_2[] = {
1396  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1397  };
1398 
1399  static const int16_t Sub0_1[] = {
1400  AMDGPU::sub0, AMDGPU::sub1,
1401  };
1402 
1403  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1404  case 32:
1405  return {};
1406  case 64:
1407  return makeArrayRef(Sub0_1);
1408  case 96:
1409  return makeArrayRef(Sub0_2);
1410  case 128:
1411  return makeArrayRef(Sub0_3);
1412  case 256:
1413  return makeArrayRef(Sub0_7);
1414  case 512:
1415  return makeArrayRef(Sub0_15);
1416  default:
1417  llvm_unreachable("unhandled register size");
1418  }
1419  }
1420 
1421  if (EltSize == 8) {
1422  static const int16_t Sub0_15_64[] = {
1423  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1424  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1425  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1426  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1427  };
1428 
1429  static const int16_t Sub0_7_64[] = {
1430  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1431  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1432  };
1433 
1434 
1435  static const int16_t Sub0_3_64[] = {
1436  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1437  };
1438 
1439  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1440  case 64:
1441  return {};
1442  case 128:
1443  return makeArrayRef(Sub0_3_64);
1444  case 256:
1445  return makeArrayRef(Sub0_7_64);
1446  case 512:
1447  return makeArrayRef(Sub0_15_64);
1448  default:
1449  llvm_unreachable("unhandled register size");
1450  }
1451  }
1452 
1453  assert(EltSize == 16 && "unhandled register spill split size");
1454 
1455  static const int16_t Sub0_15_128[] = {
1456  AMDGPU::sub0_sub1_sub2_sub3,
1457  AMDGPU::sub4_sub5_sub6_sub7,
1458  AMDGPU::sub8_sub9_sub10_sub11,
1459  AMDGPU::sub12_sub13_sub14_sub15
1460  };
1461 
1462  static const int16_t Sub0_7_128[] = {
1463  AMDGPU::sub0_sub1_sub2_sub3,
1464  AMDGPU::sub4_sub5_sub6_sub7
1465  };
1466 
1467  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1468  case 128:
1469  return {};
1470  case 256:
1471  return makeArrayRef(Sub0_7_128);
1472  case 512:
1473  return makeArrayRef(Sub0_15_128);
1474  default:
1475  llvm_unreachable("unhandled register size");
1476  }
1477 }
1478 
1479 const TargetRegisterClass*
1481  unsigned Reg) const {
1483  return MRI.getRegClass(Reg);
1484 
1485  return getPhysRegClass(Reg);
1486 }
1487 
1489  unsigned Reg) const {
1490  return hasVGPRs(getRegClassForReg(MRI, Reg));
1491 }
1492 
1494  const TargetRegisterClass *SrcRC,
1495  unsigned SubReg,
1496  const TargetRegisterClass *DstRC,
1497  unsigned DstSubReg,
1498  const TargetRegisterClass *NewRC,
1499  LiveIntervals &LIS) const {
1500  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1501  unsigned DstSize = getRegSizeInBits(*DstRC);
1502  unsigned NewSize = getRegSizeInBits(*NewRC);
1503 
1504  // Do not increase size of registers beyond dword, we would need to allocate
1505  // adjacent registers and constraint regalloc more than needed.
1506 
1507  // Always allow dword coalescing.
1508  if (SrcSize <= 32 || DstSize <= 32)
1509  return true;
1510 
1511  return NewSize <= DstSize || NewSize <= SrcSize;
1512 }
1513 
1515  MachineFunction &MF) const {
1516 
1517  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1519 
1520  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1521  *MF.getFunction());
1522  switch (RC->getID()) {
1523  default:
1524  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1525  case AMDGPU::VGPR_32RegClassID:
1526  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1527  case AMDGPU::SGPR_32RegClassID:
1528  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1529  }
1530 }
1531 
1533  unsigned Idx) const {
1534  if (Idx == getVGPRPressureSet())
1535  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1536  const_cast<MachineFunction &>(MF));
1537 
1538  if (Idx == getSGPRPressureSet())
1539  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1540  const_cast<MachineFunction &>(MF));
1541 
1542  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1543 }
1544 
1545 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1546  static const int Empty[] = { -1 };
1547 
1548  if (hasRegUnit(AMDGPU::M0, RegUnit))
1549  return Empty;
1550  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1551 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
SIRegisterInfo(const SISubtarget &ST)
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
const SIInstrInfo * getInstrInfo() const override
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:649
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:34
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const
Find an unused register of the specified register class.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
unsigned getSubRegFromChannel(unsigned Channel) const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:710
A description of a memory reference used in the backend.
bool hasInv2PiInlineImm() const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Reg
All possible values of the reg field in the ModR/M byte.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:395
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
bool hasScalarStores() const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
MCRegAliasIterator enumerates all registers aliasing Reg.
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
int64_t getImm() const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:393
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
const unsigned Kind
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
bool hasCalls() const
Return true if the current function has any function calls.