LLVM  4.0.0
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "SIInstrInfo.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "AMDGPUSubtarget.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/LLVMContext.h"
24 
25 using namespace llvm;
26 
28  "amdgpu-spill-sgpr-to-smem",
29  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
30  cl::init(false));
31 
32 
33 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
34  for (unsigned i = 0; PSets[i] != -1; ++i) {
35  if (PSets[i] == (int)PSetID)
36  return true;
37  }
38  return false;
39 }
40 
41 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
42  BitVector &PressureSets) const {
43  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
44  const int *PSets = getRegUnitPressureSets(*U);
45  if (hasPressureSet(PSets, PSetID)) {
46  PressureSets.set(PSetID);
47  break;
48  }
49  }
50 }
51 
53  SGPRPressureSets(getNumRegPressureSets()),
54  VGPRPressureSets(getNumRegPressureSets()) {
55  unsigned NumRegPressureSets = getNumRegPressureSets();
56 
57  SGPRSetID = NumRegPressureSets;
58  VGPRSetID = NumRegPressureSets;
59 
60  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
61  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
62  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
63  }
64 
65  // Determine the number of reg units for each pressure set.
66  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
67  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
68  const int *PSets = getRegUnitPressureSets(i);
69  for (unsigned j = 0; PSets[j] != -1; ++j) {
70  ++PressureSetRegUnits[PSets[j]];
71  }
72  }
73 
74  unsigned VGPRMax = 0, SGPRMax = 0;
75  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
77  VGPRSetID = i;
78  VGPRMax = PressureSetRegUnits[i];
79  continue;
80  }
81  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
82  SGPRSetID = i;
83  SGPRMax = PressureSetRegUnits[i];
84  }
85  }
86 
87  assert(SGPRSetID < NumRegPressureSets &&
88  VGPRSetID < NumRegPressureSets);
89 }
90 
91 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
92  MCRegAliasIterator R(Reg, this, true);
93 
94  for (; R.isValid(); ++R)
95  Reserved.set(*R);
96 }
97 
99  const MachineFunction &MF) const {
100  unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
101  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
102  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
103 }
104 
106  const MachineFunction &MF) const {
107  unsigned RegCount = getMaxNumSGPRs(MF);
108  unsigned Reg;
109 
110  // Try to place it in a hole after PrivateSegmentbufferReg.
111  if (RegCount & 3) {
112  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
113  // alignment constraints, so we have a hole where can put the wave offset.
114  Reg = RegCount - 1;
115  } else {
116  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
117  // wave offset before it.
118  Reg = RegCount - 5;
119  }
120  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
121 }
122 
124  BitVector Reserved(getNumRegs());
125  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
126 
127  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
128  // this seems likely to result in bugs, so I'm marking them as reserved.
129  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
130  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
131 
132  // Reserve Trap Handler registers - support is not implemented in Codegen.
133  reserveRegisterTuples(Reserved, AMDGPU::TBA);
134  reserveRegisterTuples(Reserved, AMDGPU::TMA);
135  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
136  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
137  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
138  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
139  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
140  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
141 
142  unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
143  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
144  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
145  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
146  reserveRegisterTuples(Reserved, Reg);
147  }
148 
149  unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
150  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
151  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
152  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
153  reserveRegisterTuples(Reserved, Reg);
154  }
155 
157 
158  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
159  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
160  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
161  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
162  }
163 
164  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
165  if (ScratchRSrcReg != AMDGPU::NoRegister) {
166  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
167  // to spill.
168  // TODO: May need to reserve a VGPR if doing LDS spilling.
169  reserveRegisterTuples(Reserved, ScratchRSrcReg);
170  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
171  }
172 
173  return Reserved;
174 }
175 
177  return Fn.getFrameInfo().hasStackObjects();
178 }
179 
180 bool
182  return MF.getFrameInfo().hasStackObjects();
183 }
184 
186  const MachineFunction &MF) const {
187  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
188  // create a virtual register for it during frame index elimination, so the
189  // scavenger is directly needed.
190  return MF.getFrameInfo().hasStackObjects() &&
191  MF.getSubtarget<SISubtarget>().hasScalarStores() &&
192  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
193 }
194 
196  const MachineFunction &) const {
197  // There are no special dedicated stack or frame pointers.
198  return true;
199 }
200 
202  // This helps catch bugs as verifier errors.
203  return true;
204 }
205 
208 
209  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
210  AMDGPU::OpName::offset);
211  return MI->getOperand(OffIdx).getImm();
212 }
213 
215  int Idx) const {
216  if (!SIInstrInfo::isMUBUF(*MI))
217  return 0;
218 
220  AMDGPU::OpName::vaddr) &&
221  "Should never see frame index on non-address operand");
222 
223  return getMUBUFInstrOffset(MI);
224 }
225 
227  if (!MI->mayLoadOrStore())
228  return false;
229 
230  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
231 
232  return !isUInt<12>(FullOffset);
233 }
234 
236  unsigned BaseReg,
237  int FrameIdx,
238  int64_t Offset) const {
240  DebugLoc DL; // Defaults to "unknown"
241 
242  if (Ins != MBB->end())
243  DL = Ins->getDebugLoc();
244 
245  MachineFunction *MF = MBB->getParent();
246  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
247  const SIInstrInfo *TII = Subtarget.getInstrInfo();
248 
249  if (Offset == 0) {
250  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
251  .addFrameIndex(FrameIdx);
252  return;
253  }
254 
256  unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
257  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
258 
259  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
260 
261  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
262  .addImm(Offset);
263  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
264  .addFrameIndex(FrameIdx);
265 
266  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
267  .addReg(UnusedCarry, RegState::Define | RegState::Dead)
268  .addReg(OffsetReg, RegState::Kill)
269  .addReg(FIReg);
270 }
271 
273  int64_t Offset) const {
274 
276  MachineFunction *MF = MBB->getParent();
277  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
278  const SIInstrInfo *TII = Subtarget.getInstrInfo();
279 
280 #ifndef NDEBUG
281  // FIXME: Is it possible to be storing a frame index to itself?
282  bool SeenFI = false;
283  for (const MachineOperand &MO: MI.operands()) {
284  if (MO.isFI()) {
285  if (SeenFI)
286  llvm_unreachable("should not see multiple frame indices");
287 
288  SeenFI = true;
289  }
290  }
291 #endif
292 
293  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
294  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
295 
296  assert(TII->isMUBUF(MI));
297 
298  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
299  int64_t NewOffset = OffsetOp->getImm() + Offset;
300  assert(isUInt<12>(NewOffset) && "offset should be legal");
301 
302  FIOp->ChangeToRegister(BaseReg, false);
303  OffsetOp->setImm(NewOffset);
304 }
305 
307  unsigned BaseReg,
308  int64_t Offset) const {
309  if (!SIInstrInfo::isMUBUF(*MI))
310  return false;
311 
312  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
313 
314  return isUInt<12>(NewOffset);
315 }
316 
318  const MachineFunction &MF, unsigned Kind) const {
319  // This is inaccurate. It depends on the instruction and address space. The
320  // only place where we should hit this is for dealing with frame indexes /
321  // private accesses, so this is correct in that case.
322  return &AMDGPU::VGPR_32RegClass;
323 }
324 
325 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
326 
327  switch (Op) {
328  case AMDGPU::SI_SPILL_S512_SAVE:
329  case AMDGPU::SI_SPILL_S512_RESTORE:
330  case AMDGPU::SI_SPILL_V512_SAVE:
331  case AMDGPU::SI_SPILL_V512_RESTORE:
332  return 16;
333  case AMDGPU::SI_SPILL_S256_SAVE:
334  case AMDGPU::SI_SPILL_S256_RESTORE:
335  case AMDGPU::SI_SPILL_V256_SAVE:
336  case AMDGPU::SI_SPILL_V256_RESTORE:
337  return 8;
338  case AMDGPU::SI_SPILL_S128_SAVE:
339  case AMDGPU::SI_SPILL_S128_RESTORE:
340  case AMDGPU::SI_SPILL_V128_SAVE:
341  case AMDGPU::SI_SPILL_V128_RESTORE:
342  return 4;
343  case AMDGPU::SI_SPILL_V96_SAVE:
344  case AMDGPU::SI_SPILL_V96_RESTORE:
345  return 3;
346  case AMDGPU::SI_SPILL_S64_SAVE:
347  case AMDGPU::SI_SPILL_S64_RESTORE:
348  case AMDGPU::SI_SPILL_V64_SAVE:
349  case AMDGPU::SI_SPILL_V64_RESTORE:
350  return 2;
351  case AMDGPU::SI_SPILL_S32_SAVE:
352  case AMDGPU::SI_SPILL_S32_RESTORE:
353  case AMDGPU::SI_SPILL_V32_SAVE:
354  case AMDGPU::SI_SPILL_V32_RESTORE:
355  return 1;
356  default: llvm_unreachable("Invalid spill opcode");
357  }
358 }
359 
360 static int getOffsetMUBUFStore(unsigned Opc) {
361  switch (Opc) {
362  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
363  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
364  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
365  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
366  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
367  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
368  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
369  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
370  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
371  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
372  default:
373  return -1;
374  }
375 }
376 
377 static int getOffsetMUBUFLoad(unsigned Opc) {
378  switch (Opc) {
379  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
380  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
381  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
382  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
383  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
384  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
385  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
386  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
387  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
388  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
389  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
390  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
391  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
392  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
393  default:
394  return -1;
395  }
396 }
397 
398 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
399 // need to handle the case where an SGPR may need to be spilled while spilling.
401  MachineFrameInfo &MFI,
403  int Index,
404  int64_t Offset) {
405  MachineBasicBlock *MBB = MI->getParent();
406  const DebugLoc &DL = MI->getDebugLoc();
407  bool IsStore = MI->mayStore();
408 
409  unsigned Opc = MI->getOpcode();
410  int LoadStoreOp = IsStore ?
412  if (LoadStoreOp == -1)
413  return false;
414 
415  unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
416 
417  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
418  .addReg(Reg, getDefRegState(!IsStore))
419  .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
420  .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
421  .addImm(Offset)
422  .addImm(0) // glc
423  .addImm(0) // slc
424  .addImm(0) // tfe
425  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
426  return true;
427 }
428 
429 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
430  unsigned LoadStoreOp,
431  int Index,
432  unsigned ValueReg,
433  bool IsKill,
434  unsigned ScratchRsrcReg,
435  unsigned ScratchOffsetReg,
436  int64_t InstOffset,
437  MachineMemOperand *MMO,
438  RegScavenger *RS) const {
439  MachineBasicBlock *MBB = MI->getParent();
440  MachineFunction *MF = MI->getParent()->getParent();
441  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
442  const SIInstrInfo *TII = ST.getInstrInfo();
443  const MachineFrameInfo &MFI = MF->getFrameInfo();
444 
445  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
446  const DebugLoc &DL = MI->getDebugLoc();
447  bool IsStore = Desc.mayStore();
448 
449  bool RanOutOfSGPRs = false;
450  bool Scavenged = false;
451  unsigned SOffset = ScratchOffsetReg;
452 
453  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
454  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
455  unsigned Size = NumSubRegs * 4;
456  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
457  const int64_t OriginalImmOffset = Offset;
458 
459  unsigned Align = MFI.getObjectAlignment(Index);
460  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
461 
462  if (!isUInt<12>(Offset + Size)) {
463  SOffset = AMDGPU::NoRegister;
464 
465  // We don't have access to the register scavenger if this function is called
466  // during PEI::scavengeFrameVirtualRegs().
467  if (RS)
468  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
469 
470  if (SOffset == AMDGPU::NoRegister) {
471  // There are no free SGPRs, and since we are in the process of spilling
472  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
473  // on SI/CI and on VI it is true until we implement spilling using scalar
474  // stores), we have no way to free up an SGPR. Our solution here is to
475  // add the offset directly to the ScratchOffset register, and then
476  // subtract the offset after the spill to return ScratchOffset to it's
477  // original value.
478  RanOutOfSGPRs = true;
479  SOffset = ScratchOffsetReg;
480  } else {
481  Scavenged = true;
482  }
483 
484  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
485  .addReg(ScratchOffsetReg)
486  .addImm(Offset);
487 
488  Offset = 0;
489  }
490 
491  const unsigned EltSize = 4;
492 
493  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
494  unsigned SubReg = NumSubRegs == 1 ?
495  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
496 
497  unsigned SOffsetRegState = 0;
498  unsigned SrcDstRegState = getDefRegState(!IsStore);
499  if (i + 1 == e) {
500  SOffsetRegState |= getKillRegState(Scavenged);
501  // The last implicit use carries the "Kill" flag.
502  SrcDstRegState |= getKillRegState(IsKill);
503  }
504 
505  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
506  MachineMemOperand *NewMMO
507  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
508  EltSize, MinAlign(Align, EltSize * i));
509 
510  auto MIB = BuildMI(*MBB, MI, DL, Desc)
511  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
512  .addReg(ScratchRsrcReg)
513  .addReg(SOffset, SOffsetRegState)
514  .addImm(Offset)
515  .addImm(0) // glc
516  .addImm(0) // slc
517  .addImm(0) // tfe
518  .addMemOperand(NewMMO);
519 
520  if (NumSubRegs > 1)
521  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
522  }
523 
524  if (RanOutOfSGPRs) {
525  // Subtract the offset we added to the ScratchOffset register.
526  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
527  .addReg(ScratchOffsetReg)
528  .addImm(OriginalImmOffset);
529  }
530 }
531 
532 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
533  bool Store) {
534  if (SuperRegSize % 16 == 0) {
535  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
536  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
537  }
538 
539  if (SuperRegSize % 8 == 0) {
540  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
541  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
542  }
543 
544  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
545  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
546 }
547 
549  int Index,
550  RegScavenger *RS) const {
551  MachineBasicBlock *MBB = MI->getParent();
552  MachineFunction *MF = MBB->getParent();
554  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
555  const SIInstrInfo *TII = ST.getInstrInfo();
556 
557  unsigned SuperReg = MI->getOperand(0).getReg();
558  bool IsKill = MI->getOperand(0).isKill();
559  const DebugLoc &DL = MI->getDebugLoc();
560 
562  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
563 
564  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
565 
566  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
567 
568  unsigned OffsetReg = AMDGPU::M0;
569  unsigned M0CopyReg = AMDGPU::NoRegister;
570 
571  if (SpillToSMEM) {
572  if (RS->isRegUsed(AMDGPU::M0)) {
573  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
574  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
575  .addReg(AMDGPU::M0);
576  }
577  }
578 
579  unsigned ScalarStoreOp;
580  unsigned EltSize = 4;
581  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
582  if (SpillToSMEM && isSGPRClass(RC)) {
583  // XXX - if private_element_size is larger than 4 it might be useful to be
584  // able to spill wider vmem spills.
585  std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true);
586  }
587 
588  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
589  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
590 
591  // SubReg carries the "Kill" flag when SubReg == SuperReg.
592  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
593  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
594  unsigned SubReg = NumSubRegs == 1 ?
595  SuperReg : getSubReg(SuperReg, SplitParts[i]);
596 
597  if (SpillToSMEM) {
598  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
599 
600  // The allocated memory size is really the wavefront size * the frame
601  // index size. The widest register class is 64 bytes, so a 4-byte scratch
602  // allocation is enough to spill this in a single stack object.
603  //
604  // FIXME: Frame size/offsets are computed earlier than this, so the extra
605  // space is still unnecessarily allocated.
606 
607  unsigned Align = FrameInfo.getObjectAlignment(Index);
608  MachinePointerInfo PtrInfo
609  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
610  MachineMemOperand *MMO
612  EltSize, MinAlign(Align, EltSize * i));
613 
614  // SMEM instructions only support a single offset, so increment the wave
615  // offset.
616 
617  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
618  if (Offset != 0) {
619  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
620  .addReg(MFI->getScratchWaveOffsetReg())
621  .addImm(Offset);
622  } else {
623  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
624  .addReg(MFI->getScratchWaveOffsetReg());
625  }
626 
627  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
628  .addReg(SubReg, getKillRegState(IsKill)) // sdata
629  .addReg(MFI->getScratchRSrcReg()) // sbase
630  .addReg(OffsetReg, RegState::Kill) // soff
631  .addImm(0) // glc
632  .addMemOperand(MMO);
633 
634  continue;
635  }
636 
637  struct SIMachineFunctionInfo::SpilledReg Spill =
638  MFI->getSpilledReg(MF, Index, i);
639  if (Spill.hasReg()) {
640  BuildMI(*MBB, MI, DL,
641  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
642  Spill.VGPR)
643  .addReg(SubReg, getKillRegState(IsKill))
644  .addImm(Spill.Lane);
645 
646  // FIXME: Since this spills to another register instead of an actual
647  // frame index, we should delete the frame index when all references to
648  // it are fixed.
649  } else {
650  // Spill SGPR to a frame index.
651  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
652  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
653  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
654 
656  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
657  .addReg(SubReg, SubKillState);
658 
659 
660  // There could be undef components of a spilled super register.
661  // TODO: Can we detect this and skip the spill?
662  if (NumSubRegs > 1) {
663  // The last implicit use of the SuperReg carries the "Kill" flag.
664  unsigned SuperKillState = 0;
665  if (i + 1 == e)
666  SuperKillState |= getKillRegState(IsKill);
667  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
668  }
669 
670  unsigned Align = FrameInfo.getObjectAlignment(Index);
671  MachinePointerInfo PtrInfo
672  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
673  MachineMemOperand *MMO
675  EltSize, MinAlign(Align, EltSize * i));
676  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
677  .addReg(TmpReg, RegState::Kill) // src
678  .addFrameIndex(Index) // vaddr
679  .addReg(MFI->getScratchRSrcReg()) // srrsrc
680  .addReg(MFI->getScratchWaveOffsetReg()) // soffset
681  .addImm(i * 4) // offset
682  .addMemOperand(MMO);
683  }
684  }
685 
686  if (M0CopyReg != AMDGPU::NoRegister) {
687  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
688  .addReg(M0CopyReg, RegState::Kill);
689  }
690 
691  MI->eraseFromParent();
692  MFI->addToSpilledSGPRs(NumSubRegs);
693 }
694 
696  int Index,
697  RegScavenger *RS) const {
698  MachineFunction *MF = MI->getParent()->getParent();
700  MachineBasicBlock *MBB = MI->getParent();
702  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
703  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
704  const SIInstrInfo *TII = ST.getInstrInfo();
705  const DebugLoc &DL = MI->getDebugLoc();
706 
707  unsigned SuperReg = MI->getOperand(0).getReg();
708  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
709 
710  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
711 
712  unsigned OffsetReg = AMDGPU::M0;
713  unsigned M0CopyReg = AMDGPU::NoRegister;
714 
715  if (SpillToSMEM) {
716  if (RS->isRegUsed(AMDGPU::M0)) {
717  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
718  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
719  .addReg(AMDGPU::M0);
720  }
721  }
722 
723  unsigned EltSize = 4;
724  unsigned ScalarLoadOp;
725 
726  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
727  if (SpillToSMEM && isSGPRClass(RC)) {
728  // XXX - if private_element_size is larger than 4 it might be useful to be
729  // able to spill wider vmem spills.
730  std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false);
731  }
732 
733  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
734  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
735 
736  // SubReg carries the "Kill" flag when SubReg == SuperReg.
737  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
738 
739  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
740  unsigned SubReg = NumSubRegs == 1 ?
741  SuperReg : getSubReg(SuperReg, SplitParts[i]);
742 
743  if (SpillToSMEM) {
744  // FIXME: Size may be > 4 but extra bytes wasted.
745  unsigned Align = FrameInfo.getObjectAlignment(Index);
746  MachinePointerInfo PtrInfo
747  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
748  MachineMemOperand *MMO
750  EltSize, MinAlign(Align, EltSize * i));
751 
752  // Add i * 4 offset
753  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
754  if (Offset != 0) {
755  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
756  .addReg(MFI->getScratchWaveOffsetReg())
757  .addImm(Offset);
758  } else {
759  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
760  .addReg(MFI->getScratchWaveOffsetReg());
761  }
762 
763  auto MIB =
764  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
765  .addReg(MFI->getScratchRSrcReg()) // sbase
766  .addReg(OffsetReg, RegState::Kill) // soff
767  .addImm(0) // glc
768  .addMemOperand(MMO);
769 
770  if (NumSubRegs > 1)
771  MIB.addReg(SuperReg, RegState::ImplicitDefine);
772 
773  continue;
774  }
775 
777  = MFI->getSpilledReg(MF, Index, i);
778 
779  if (Spill.hasReg()) {
780  auto MIB =
781  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
782  SubReg)
783  .addReg(Spill.VGPR)
784  .addImm(Spill.Lane);
785 
786  if (NumSubRegs > 1)
787  MIB.addReg(SuperReg, RegState::ImplicitDefine);
788  } else {
789  // Restore SGPR from a stack slot.
790  // FIXME: We should use S_LOAD_DWORD here for VI.
791  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
792  unsigned Align = FrameInfo.getObjectAlignment(Index);
793 
794  MachinePointerInfo PtrInfo
795  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
796 
797  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
798  MachineMemOperand::MOLoad, EltSize,
799  MinAlign(Align, EltSize * i));
800 
801  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
802  .addFrameIndex(Index) // vaddr
803  .addReg(MFI->getScratchRSrcReg()) // srsrc
804  .addReg(MFI->getScratchWaveOffsetReg()) // soffset
805  .addImm(i * 4) // offset
806  .addMemOperand(MMO);
807 
808  auto MIB =
809  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
810  .addReg(TmpReg, RegState::Kill);
811 
812  if (NumSubRegs > 1)
813  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
814  }
815  }
816 
817  if (M0CopyReg != AMDGPU::NoRegister) {
818  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
819  .addReg(M0CopyReg, RegState::Kill);
820  }
821 
822  MI->eraseFromParent();
823 }
824 
826  int SPAdj, unsigned FIOperandNum,
827  RegScavenger *RS) const {
828  MachineFunction *MF = MI->getParent()->getParent();
830  MachineBasicBlock *MBB = MI->getParent();
832  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
833  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
834  const SIInstrInfo *TII = ST.getInstrInfo();
835  DebugLoc DL = MI->getDebugLoc();
836 
837  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
838  int Index = MI->getOperand(FIOperandNum).getIndex();
839 
840  switch (MI->getOpcode()) {
841  // SGPR register spill
842  case AMDGPU::SI_SPILL_S512_SAVE:
843  case AMDGPU::SI_SPILL_S256_SAVE:
844  case AMDGPU::SI_SPILL_S128_SAVE:
845  case AMDGPU::SI_SPILL_S64_SAVE:
846  case AMDGPU::SI_SPILL_S32_SAVE: {
847  spillSGPR(MI, Index, RS);
848  break;
849  }
850 
851  // SGPR register restore
852  case AMDGPU::SI_SPILL_S512_RESTORE:
853  case AMDGPU::SI_SPILL_S256_RESTORE:
854  case AMDGPU::SI_SPILL_S128_RESTORE:
855  case AMDGPU::SI_SPILL_S64_RESTORE:
856  case AMDGPU::SI_SPILL_S32_RESTORE: {
857  restoreSGPR(MI, Index, RS);
858  break;
859  }
860 
861  // VGPR register spill
862  case AMDGPU::SI_SPILL_V512_SAVE:
863  case AMDGPU::SI_SPILL_V256_SAVE:
864  case AMDGPU::SI_SPILL_V128_SAVE:
865  case AMDGPU::SI_SPILL_V96_SAVE:
866  case AMDGPU::SI_SPILL_V64_SAVE:
867  case AMDGPU::SI_SPILL_V32_SAVE: {
868  const MachineOperand *VData = TII->getNamedOperand(*MI,
869  AMDGPU::OpName::vdata);
870  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
871  Index,
872  VData->getReg(), VData->isKill(),
873  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
874  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
875  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
876  *MI->memoperands_begin(),
877  RS);
878  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
879  MI->eraseFromParent();
880  break;
881  }
882  case AMDGPU::SI_SPILL_V32_RESTORE:
883  case AMDGPU::SI_SPILL_V64_RESTORE:
884  case AMDGPU::SI_SPILL_V96_RESTORE:
885  case AMDGPU::SI_SPILL_V128_RESTORE:
886  case AMDGPU::SI_SPILL_V256_RESTORE:
887  case AMDGPU::SI_SPILL_V512_RESTORE: {
888  const MachineOperand *VData = TII->getNamedOperand(*MI,
889  AMDGPU::OpName::vdata);
890 
891  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
892  Index,
893  VData->getReg(), VData->isKill(),
894  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
895  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
896  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
897  *MI->memoperands_begin(),
898  RS);
899  MI->eraseFromParent();
900  break;
901  }
902 
903  default: {
904  if (TII->isMUBUF(*MI)) {
905  // Disable offen so we don't need a 0 vgpr base.
906  assert(static_cast<int>(FIOperandNum) ==
907  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
908  AMDGPU::OpName::vaddr));
909 
910  int64_t Offset = FrameInfo.getObjectOffset(Index);
911  int64_t OldImm
912  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
913  int64_t NewOffset = OldImm + Offset;
914 
915  if (isUInt<12>(NewOffset) &&
916  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
917  MI->eraseFromParent();
918  break;
919  }
920  }
921 
922  int64_t Offset = FrameInfo.getObjectOffset(Index);
923  FIOp.ChangeToImmediate(Offset);
924  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
925  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
926  BuildMI(*MBB, MI, MI->getDebugLoc(),
927  TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
928  .addImm(Offset);
929  FIOp.ChangeToRegister(TmpReg, false, false, true);
930  }
931  }
932  }
933 }
934 
935 // FIXME: This is very slow. It might be worth creating a map from physreg to
936 // register class.
939 
940  static const TargetRegisterClass *const BaseClasses[] = {
941  &AMDGPU::VGPR_32RegClass,
942  &AMDGPU::SReg_32RegClass,
943  &AMDGPU::VReg_64RegClass,
944  &AMDGPU::SReg_64RegClass,
945  &AMDGPU::VReg_96RegClass,
946  &AMDGPU::VReg_128RegClass,
947  &AMDGPU::SReg_128RegClass,
948  &AMDGPU::VReg_256RegClass,
949  &AMDGPU::SReg_256RegClass,
950  &AMDGPU::VReg_512RegClass,
951  &AMDGPU::SReg_512RegClass,
952  &AMDGPU::SCC_CLASSRegClass,
953  };
954 
955  for (const TargetRegisterClass *BaseClass : BaseClasses) {
956  if (BaseClass->contains(Reg)) {
957  return BaseClass;
958  }
959  }
960  return nullptr;
961 }
962 
963 // TODO: It might be helpful to have some target specific flags in
964 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
966  switch (RC->getSize()) {
967  case 0: return false;
968  case 1: return false;
969  case 4:
970  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
971  case 8:
972  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
973  case 12:
974  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
975  case 16:
976  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
977  case 32:
978  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
979  case 64:
980  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
981  default:
982  llvm_unreachable("Invalid register class size");
983  }
984 }
985 
987  const TargetRegisterClass *SRC) const {
988  switch (SRC->getSize()) {
989  case 4:
990  return &AMDGPU::VGPR_32RegClass;
991  case 8:
992  return &AMDGPU::VReg_64RegClass;
993  case 12:
994  return &AMDGPU::VReg_96RegClass;
995  case 16:
996  return &AMDGPU::VReg_128RegClass;
997  case 32:
998  return &AMDGPU::VReg_256RegClass;
999  case 64:
1000  return &AMDGPU::VReg_512RegClass;
1001  default:
1002  llvm_unreachable("Invalid register class size");
1003  }
1004 }
1005 
1007  const TargetRegisterClass *VRC) const {
1008  switch (VRC->getSize()) {
1009  case 4:
1010  return &AMDGPU::SGPR_32RegClass;
1011  case 8:
1012  return &AMDGPU::SReg_64RegClass;
1013  case 16:
1014  return &AMDGPU::SReg_128RegClass;
1015  case 32:
1016  return &AMDGPU::SReg_256RegClass;
1017  case 64:
1018  return &AMDGPU::SReg_512RegClass;
1019  default:
1020  llvm_unreachable("Invalid register class size");
1021  }
1022 }
1023 
1025  const TargetRegisterClass *RC, unsigned SubIdx) const {
1026  if (SubIdx == AMDGPU::NoSubRegister)
1027  return RC;
1028 
1029  // We can assume that each lane corresponds to one 32-bit register.
1030  LaneBitmask::Type Mask = getSubRegIndexLaneMask(SubIdx).getAsInteger();
1031  unsigned Count = countPopulation(Mask);
1032  if (isSGPRClass(RC)) {
1033  switch (Count) {
1034  case 1:
1035  return &AMDGPU::SGPR_32RegClass;
1036  case 2:
1037  return &AMDGPU::SReg_64RegClass;
1038  case 4:
1039  return &AMDGPU::SReg_128RegClass;
1040  case 8:
1041  return &AMDGPU::SReg_256RegClass;
1042  case 16: /* fall-through */
1043  default:
1044  llvm_unreachable("Invalid sub-register class size");
1045  }
1046  } else {
1047  switch (Count) {
1048  case 1:
1049  return &AMDGPU::VGPR_32RegClass;
1050  case 2:
1051  return &AMDGPU::VReg_64RegClass;
1052  case 3:
1053  return &AMDGPU::VReg_96RegClass;
1054  case 4:
1055  return &AMDGPU::VReg_128RegClass;
1056  case 8:
1057  return &AMDGPU::VReg_256RegClass;
1058  case 16: /* fall-through */
1059  default:
1060  llvm_unreachable("Invalid sub-register class size");
1061  }
1062  }
1063 }
1064 
1066  const TargetRegisterClass *DefRC,
1067  unsigned DefSubReg,
1068  const TargetRegisterClass *SrcRC,
1069  unsigned SrcSubReg) const {
1070  // We want to prefer the smallest register class possible, so we don't want to
1071  // stop and rewrite on anything that looks like a subregister
1072  // extract. Operations mostly don't care about the super register class, so we
1073  // only want to stop on the most basic of copies between the same register
1074  // class.
1075  //
1076  // e.g. if we have something like
1077  // vreg0 = ...
1078  // vreg1 = ...
1079  // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
1080  // vreg3 = COPY vreg2, sub0
1081  //
1082  // We want to look through the COPY to find:
1083  // => vreg3 = COPY vreg0
1084 
1085  // Plain copy.
1086  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1087 }
1088 
1089 // FIXME: Most of these are flexible with HSA and we don't need to reserve them
1090 // as input registers if unused. Whether the dispatch ptr is necessary should be
1091 // easy to detect from used intrinsics. Scratch setup is harder to know.
1093  enum PreloadedValue Value) const {
1094 
1096  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1097  (void)ST;
1098  switch (Value) {
1100  assert(MFI->hasWorkGroupIDX());
1101  return MFI->WorkGroupIDXSystemSGPR;
1103  assert(MFI->hasWorkGroupIDY());
1104  return MFI->WorkGroupIDYSystemSGPR;
1106  assert(MFI->hasWorkGroupIDZ());
1107  return MFI->WorkGroupIDZSystemSGPR;
1109  return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
1111  if (ST.isAmdCodeObjectV2(MF)) {
1113  return MFI->PrivateSegmentBufferUserSGPR;
1114  }
1116  return MFI->PrivateMemoryPtrUserSGPR;
1118  assert(MFI->hasKernargSegmentPtr());
1119  return MFI->KernargSegmentPtrUserSGPR;
1121  assert(MFI->hasDispatchID());
1122  return MFI->DispatchIDUserSGPR;
1124  assert(MFI->hasFlatScratchInit());
1125  return MFI->FlatScratchInitUserSGPR;
1127  assert(MFI->hasDispatchPtr());
1128  return MFI->DispatchPtrUserSGPR;
1130  assert(MFI->hasQueuePtr());
1131  return MFI->QueuePtrUserSGPR;
1133  assert(MFI->hasWorkItemIDX());
1134  return AMDGPU::VGPR0;
1136  assert(MFI->hasWorkItemIDY());
1137  return AMDGPU::VGPR1;
1139  assert(MFI->hasWorkItemIDZ());
1140  return AMDGPU::VGPR2;
1141  }
1142  llvm_unreachable("unexpected preloaded value type");
1143 }
1144 
1145 /// \brief Returns a register that is not used at any point in the function.
1146 /// If all registers are used, then this function will return
1147 // AMDGPU::NoRegister.
1148 unsigned
1150  const TargetRegisterClass *RC,
1151  const MachineFunction &MF) const {
1152 
1153  for (unsigned Reg : *RC)
1154  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1155  return Reg;
1156  return AMDGPU::NoRegister;
1157 }
1158 
1161  return 800;
1162  return 512;
1163 }
1164 
1167  return 102;
1168  return 104;
1169 }
1170 
1172  const SIMachineFunctionInfo &MFI) const {
1173  if (MFI.hasFlatScratchInit()) {
1175  return 6; // FLAT_SCRATCH, XNACK, VCC (in that order)
1176 
1178  return 4; // FLAT_SCRATCH, VCC (in that order)
1179  }
1180 
1181  if (ST.isXNACKEnabled())
1182  return 4; // XNACK, VCC (in that order)
1183 
1184  return 2; // VCC.
1185 }
1186 
1188  unsigned WavesPerEU) const {
1190  switch (WavesPerEU) {
1191  case 0: return 0;
1192  case 10: return 0;
1193  case 9: return 0;
1194  case 8: return 81;
1195  default: return 97;
1196  }
1197  } else {
1198  switch (WavesPerEU) {
1199  case 0: return 0;
1200  case 10: return 0;
1201  case 9: return 49;
1202  case 8: return 57;
1203  case 7: return 65;
1204  case 6: return 73;
1205  case 5: return 81;
1206  default: return 97;
1207  }
1208  }
1209 }
1210 
1212  unsigned WavesPerEU,
1213  bool Addressable) const {
1215  switch (WavesPerEU) {
1216  case 0: return 80;
1217  case 10: return 80;
1218  case 9: return 80;
1219  case 8: return 96;
1220  default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
1221  }
1222  } else {
1223  switch (WavesPerEU) {
1224  case 0: return 48;
1225  case 10: return 48;
1226  case 9: return 56;
1227  case 8: return 64;
1228  case 7: return 72;
1229  case 6: return 80;
1230  case 5: return 96;
1231  default: return getNumAddressableSGPRs(ST);
1232  }
1233  }
1234 }
1235 
1237  const Function &F = *MF.getFunction();
1238 
1239  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1241 
1242  // Compute maximum number of SGPRs function can use using default/requested
1243  // minimum number of waves per execution unit.
1244  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
1245  unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
1246  unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
1247 
1248  // Check if maximum number of SGPRs was explicitly requested using
1249  // "amdgpu-num-sgpr" attribute.
1250  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
1251  unsigned Requested = AMDGPU::getIntegerAttribute(
1252  F, "amdgpu-num-sgpr", MaxNumSGPRs);
1253 
1254  // Make sure requested value does not violate subtarget's specifications.
1255  if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI)))
1256  Requested = 0;
1257 
1258  // If more SGPRs are required to support the input user/system SGPRs,
1259  // increase to accommodate them.
1260  //
1261  // FIXME: This really ends up using the requested number of SGPRs + number
1262  // of reserved special registers in total. Theoretically you could re-use
1263  // the last input registers for these special registers, but this would
1264  // require a lot of complexity to deal with the weird aliasing.
1265  unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
1266  if (Requested && Requested < NumInputSGPRs)
1267  Requested = NumInputSGPRs;
1268 
1269  // Make sure requested value is compatible with values implied by
1270  // default/requested minimum/maximum number of waves per execution unit.
1271  if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
1272  Requested = 0;
1273  if (WavesPerEU.second &&
1274  Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
1275  Requested = 0;
1276 
1277  if (Requested)
1278  MaxNumSGPRs = Requested;
1279  }
1280 
1281  if (ST.hasSGPRInitBug())
1283 
1284  return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI),
1285  MaxNumAddressableSGPRs);
1286 }
1287 
1289  const SISubtarget &ST) const {
1290  if (ST.debuggerReserveRegs())
1291  return 4;
1292  return 0;
1293 }
1294 
1295 unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
1296  switch (WavesPerEU) {
1297  case 0: return 0;
1298  case 10: return 0;
1299  case 9: return 25;
1300  case 8: return 29;
1301  case 7: return 33;
1302  case 6: return 37;
1303  case 5: return 41;
1304  case 4: return 49;
1305  case 3: return 65;
1306  case 2: return 85;
1307  default: return 129;
1308  }
1309 }
1310 
1311 unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
1312  switch (WavesPerEU) {
1313  case 0: return 24;
1314  case 10: return 24;
1315  case 9: return 28;
1316  case 8: return 32;
1317  case 7: return 36;
1318  case 6: return 40;
1319  case 5: return 48;
1320  case 4: return 64;
1321  case 3: return 84;
1322  case 2: return 128;
1323  default: return getTotalNumVGPRs();
1324  }
1325 }
1326 
1328  const Function &F = *MF.getFunction();
1329 
1330  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1332 
1333  // Compute maximum number of VGPRs function can use using default/requested
1334  // minimum number of waves per execution unit.
1335  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
1336  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
1337 
1338  // Check if maximum number of VGPRs was explicitly requested using
1339  // "amdgpu-num-vgpr" attribute.
1340  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
1341  unsigned Requested = AMDGPU::getIntegerAttribute(
1342  F, "amdgpu-num-vgpr", MaxNumVGPRs);
1343 
1344  // Make sure requested value does not violate subtarget's specifications.
1345  if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
1346  Requested = 0;
1347 
1348  // Make sure requested value is compatible with values implied by
1349  // default/requested minimum/maximum number of waves per execution unit.
1350  if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
1351  Requested = 0;
1352  if (WavesPerEU.second &&
1353  Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
1354  Requested = 0;
1355 
1356  if (Requested)
1357  MaxNumVGPRs = Requested;
1358  }
1359 
1360  return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
1361 }
1362 
1364  unsigned EltSize) const {
1365  if (EltSize == 4) {
1366  static const int16_t Sub0_15[] = {
1367  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1368  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1369  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1370  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1371  };
1372 
1373  static const int16_t Sub0_7[] = {
1374  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1375  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1376  };
1377 
1378  static const int16_t Sub0_3[] = {
1379  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1380  };
1381 
1382  static const int16_t Sub0_2[] = {
1383  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1384  };
1385 
1386  static const int16_t Sub0_1[] = {
1387  AMDGPU::sub0, AMDGPU::sub1,
1388  };
1389 
1390  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1391  case 32:
1392  return {};
1393  case 64:
1394  return makeArrayRef(Sub0_1);
1395  case 96:
1396  return makeArrayRef(Sub0_2);
1397  case 128:
1398  return makeArrayRef(Sub0_3);
1399  case 256:
1400  return makeArrayRef(Sub0_7);
1401  case 512:
1402  return makeArrayRef(Sub0_15);
1403  default:
1404  llvm_unreachable("unhandled register size");
1405  }
1406  }
1407 
1408  if (EltSize == 8) {
1409  static const int16_t Sub0_15_64[] = {
1410  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1411  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1412  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1413  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1414  };
1415 
1416  static const int16_t Sub0_7_64[] = {
1417  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1418  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1419  };
1420 
1421 
1422  static const int16_t Sub0_3_64[] = {
1423  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1424  };
1425 
1426  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1427  case 64:
1428  return {};
1429  case 128:
1430  return makeArrayRef(Sub0_3_64);
1431  case 256:
1432  return makeArrayRef(Sub0_7_64);
1433  case 512:
1434  return makeArrayRef(Sub0_15_64);
1435  default:
1436  llvm_unreachable("unhandled register size");
1437  }
1438  }
1439 
1440  assert(EltSize == 16 && "unhandled register spill split size");
1441 
1442  static const int16_t Sub0_15_128[] = {
1443  AMDGPU::sub0_sub1_sub2_sub3,
1444  AMDGPU::sub4_sub5_sub6_sub7,
1445  AMDGPU::sub8_sub9_sub10_sub11,
1446  AMDGPU::sub12_sub13_sub14_sub15
1447  };
1448 
1449  static const int16_t Sub0_7_128[] = {
1450  AMDGPU::sub0_sub1_sub2_sub3,
1451  AMDGPU::sub4_sub5_sub6_sub7
1452  };
1453 
1454  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1455  case 128:
1456  return {};
1457  case 256:
1458  return makeArrayRef(Sub0_7_128);
1459  case 512:
1460  return makeArrayRef(Sub0_15_128);
1461  default:
1462  llvm_unreachable("unhandled register size");
1463  }
1464 }
1465 
1466 const TargetRegisterClass*
1468  unsigned Reg) const {
1470  return MRI.getRegClass(Reg);
1471 
1472  return getPhysRegClass(Reg);
1473 }
1474 
1476  unsigned Reg) const {
1477  return hasVGPRs(getRegClassForReg(MRI, Reg));
1478 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
BitVector & set()
Definition: BitVector.h:219
Interface definition for SIRegisterInfo.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
Flags getFlags() const
Return the raw flags of the source value,.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
size_t i
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static int getOffsetMUBUFLoad(unsigned Opc)
bool isSGPRPressureSet(unsigned SetID) const
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:384
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
void restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
unsigned getScratchWaveOffsetReg() const
unsigned getID() const
Return the register class ID number.
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
A debug info location.
Definition: DebugLoc.h:34
bool isSGPRClass(const TargetRegisterClass *RC) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
const MachinePointerInfo & getPointerInfo() const
unsigned getNumReservedSGPRs(const SISubtarget &ST, const SIMachineFunctionInfo &MFI) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:301
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
unsigned getSize() const
Return the size of the register in bytes, which is also the size of a stack slot allocated to hold a ...
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:692
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU, bool Addressable) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
unsigned SubReg
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Reg
All possible values of the reg field in the ModR/M byte.
MachinePointerInfo getWithOffset(int64_t O) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool hasPressureSet(const int *PSets, unsigned PSetID)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
#define F(x, y, z)
Definition: MD5.cpp:51
bool isKill() const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:319
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
bool isXNACKEnabled() const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
MachineBasicBlock * MBB
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
int64_t getImm() const
Generation getGeneration() const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool hasSGPRInitBug() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
This file declares the machine register scavenger class.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
unsigned const MachineRegisterInfo * MRI
bool hasVGPRs(const TargetRegisterClass *RC) const
unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const
Returns the physical register that Value is stored in.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getSubRegFromChannel(unsigned Channel) const
MCRegAliasIterator enumerates all registers aliasing Reg.
uint32_t Offset
bool hasScalarStores() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool debuggerReserveRegs() const
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
The memory access writes data.
unsigned getNumAddressableSGPRs(const SISubtarget &ST) const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:494
MachineOperand class - Representation of each machine instruction operand.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex, unsigned SubIdx)
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
const MachineInstrBuilder & addFrameIndex(int Idx) const
void spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SynchronizationScope SynchScope=CrossThread, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const MCRegisterClass * MC
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1132
Representation of each machine instruction.
Definition: MachineInstr.h:52
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
std::pair< unsigned, unsigned > getWavesPerEU() const
unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const
Find an unused register of the specified register class.
unsigned getTotalNumSGPRs(const SISubtarget &ST) const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:615
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
unsigned getWavefrontSize() const
static int getOffsetMUBUFStore(unsigned Opc)
unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const
const unsigned Kind
unsigned getReg() const
getReg - Returns the register number.
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:71
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
IRTranslator LLVM IR MI
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
unsigned getTotalNumVGPRs() const