LLVM  7.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/LLVMContext.h"
24 
25 using namespace llvm;
26 
27 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
28  for (unsigned i = 0; PSets[i] != -1; ++i) {
29  if (PSets[i] == (int)PSetID)
30  return true;
31  }
32  return false;
33 }
34 
35 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
36  BitVector &PressureSets) const {
37  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
38  const int *PSets = getRegUnitPressureSets(*U);
39  if (hasPressureSet(PSets, PSetID)) {
40  PressureSets.set(PSetID);
41  break;
42  }
43  }
44 }
45 
47  "amdgpu-spill-sgpr-to-smem",
48  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
49  cl::init(false));
50 
52  "amdgpu-spill-sgpr-to-vgpr",
53  cl::desc("Enable spilling VGPRs to SGPRs"),
55  cl::init(true));
56 
59  SGPRPressureSets(getNumRegPressureSets()),
60  VGPRPressureSets(getNumRegPressureSets()),
61  SpillSGPRToVGPR(false),
62  SpillSGPRToSMEM(false) {
63  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
64  SpillSGPRToSMEM = true;
65  else if (EnableSpillSGPRToVGPR)
66  SpillSGPRToVGPR = true;
67 
68  unsigned NumRegPressureSets = getNumRegPressureSets();
69 
70  SGPRSetID = NumRegPressureSets;
71  VGPRSetID = NumRegPressureSets;
72 
73  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
74  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
75  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
76  }
77 
78  // Determine the number of reg units for each pressure set.
79  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
80  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
81  const int *PSets = getRegUnitPressureSets(i);
82  for (unsigned j = 0; PSets[j] != -1; ++j) {
83  ++PressureSetRegUnits[PSets[j]];
84  }
85  }
86 
87  unsigned VGPRMax = 0, SGPRMax = 0;
88  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
89  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
90  VGPRSetID = i;
91  VGPRMax = PressureSetRegUnits[i];
92  continue;
93  }
94  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
95  SGPRSetID = i;
96  SGPRMax = PressureSetRegUnits[i];
97  }
98  }
99 
100  assert(SGPRSetID < NumRegPressureSets &&
101  VGPRSetID < NumRegPressureSets);
102 }
103 
105  const MachineFunction &MF) const {
106 
107  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
108  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
109  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
110  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
111 }
112 
113 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
114  unsigned Reg;
115 
116  // Try to place it in a hole after PrivateSegmentBufferReg.
117  if (RegCount & 3) {
118  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
119  // alignment constraints, so we have a hole where can put the wave offset.
120  Reg = RegCount - 1;
121  } else {
122  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
123  // wave offset before it.
124  Reg = RegCount - 5;
125  }
126 
127  return Reg;
128 }
129 
131  const MachineFunction &MF) const {
132  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
134  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
135 }
136 
138  const MachineFunction &MF) const {
139  return AMDGPU::SGPR32;
140 }
141 
143  BitVector Reserved(getNumRegs());
144 
145  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
146  // this seems likely to result in bugs, so I'm marking them as reserved.
147  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
148  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
149 
150  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
151  reserveRegisterTuples(Reserved, AMDGPU::M0);
152 
153  // Reserve the memory aperture registers.
154  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
155  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
156  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
157  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
158 
159  // Reserve xnack_mask registers - support is not implemented in Codegen.
160  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
161 
162  // Reserve Trap Handler registers - support is not implemented in Codegen.
163  reserveRegisterTuples(Reserved, AMDGPU::TBA);
164  reserveRegisterTuples(Reserved, AMDGPU::TMA);
165  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
166  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
167  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
168  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
169  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
170  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
171  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
172  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
173 
174  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
175 
176  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
177  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
178  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
179  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
180  reserveRegisterTuples(Reserved, Reg);
181  }
182 
183  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
184  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
185  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
186  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
187  reserveRegisterTuples(Reserved, Reg);
188  }
189 
191 
192  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
193  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
194  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
195  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
196  }
197 
198  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
199  if (ScratchRSrcReg != AMDGPU::NoRegister) {
200  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
201  // to spill.
202  // TODO: May need to reserve a VGPR if doing LDS spilling.
203  reserveRegisterTuples(Reserved, ScratchRSrcReg);
204  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
205  }
206 
207  // We have to assume the SP is needed in case there are calls in the function,
208  // which is detected after the function is lowered. If we aren't really going
209  // to need SP, don't bother reserving it.
210  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
211 
212  if (StackPtrReg != AMDGPU::NoRegister) {
213  reserveRegisterTuples(Reserved, StackPtrReg);
214  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
215  }
216 
217  unsigned FrameReg = MFI->getFrameOffsetReg();
218  if (FrameReg != AMDGPU::NoRegister) {
219  reserveRegisterTuples(Reserved, FrameReg);
220  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
221  }
222 
223  return Reserved;
224 }
225 
228  if (Info->isEntryFunction()) {
229  const MachineFrameInfo &MFI = Fn.getFrameInfo();
230  return MFI.hasStackObjects() || MFI.hasCalls();
231  }
232 
233  // May need scavenger for dealing with callee saved registers.
234  return true;
235 }
236 
238  const MachineFunction &MF) const {
239  const MachineFrameInfo &MFI = MF.getFrameInfo();
240  if (MFI.hasStackObjects())
241  return true;
242 
243  // May need to deal with callee saved registers.
245  return !Info->isEntryFunction();
246 }
247 
249  const MachineFunction &MF) const {
250  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
251  // create a virtual register for it during frame index elimination, so the
252  // scavenger is directly needed.
253  return MF.getFrameInfo().hasStackObjects() &&
254  MF.getSubtarget<SISubtarget>().hasScalarStores() &&
255  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
256 }
257 
259  const MachineFunction &) const {
260  // There are no special dedicated stack or frame pointers.
261  return true;
262 }
263 
265  // This helps catch bugs as verifier errors.
266  return true;
267 }
268 
271 
272  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
273  AMDGPU::OpName::offset);
274  return MI->getOperand(OffIdx).getImm();
275 }
276 
278  int Idx) const {
279  if (!SIInstrInfo::isMUBUF(*MI))
280  return 0;
281 
283  AMDGPU::OpName::vaddr) &&
284  "Should never see frame index on non-address operand");
285 
286  return getMUBUFInstrOffset(MI);
287 }
288 
290  if (!MI->mayLoadOrStore())
291  return false;
292 
293  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
294 
295  return !isUInt<12>(FullOffset);
296 }
297 
299  unsigned BaseReg,
300  int FrameIdx,
301  int64_t Offset) const {
303  DebugLoc DL; // Defaults to "unknown"
304 
305  if (Ins != MBB->end())
306  DL = Ins->getDebugLoc();
307 
308  MachineFunction *MF = MBB->getParent();
309  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
310  const SIInstrInfo *TII = Subtarget.getInstrInfo();
311 
312  if (Offset == 0) {
313  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
314  .addFrameIndex(FrameIdx);
315  return;
316  }
317 
319  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
320 
321  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
322 
323  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
324  .addImm(Offset);
325  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
326  .addFrameIndex(FrameIdx);
327 
328  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
329  .addReg(OffsetReg, RegState::Kill)
330  .addReg(FIReg);
331 }
332 
334  int64_t Offset) const {
335 
336  MachineBasicBlock *MBB = MI.getParent();
337  MachineFunction *MF = MBB->getParent();
338  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
339  const SIInstrInfo *TII = Subtarget.getInstrInfo();
340 
341 #ifndef NDEBUG
342  // FIXME: Is it possible to be storing a frame index to itself?
343  bool SeenFI = false;
344  for (const MachineOperand &MO: MI.operands()) {
345  if (MO.isFI()) {
346  if (SeenFI)
347  llvm_unreachable("should not see multiple frame indices");
348 
349  SeenFI = true;
350  }
351  }
352 #endif
353 
354  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
355  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
356  assert(TII->isMUBUF(MI));
357  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
358  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
359  "should only be seeing frame offset relative FrameIndex");
360 
361 
362  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
363  int64_t NewOffset = OffsetOp->getImm() + Offset;
364  assert(isUInt<12>(NewOffset) && "offset should be legal");
365 
366  FIOp->ChangeToRegister(BaseReg, false);
367  OffsetOp->setImm(NewOffset);
368 }
369 
371  unsigned BaseReg,
372  int64_t Offset) const {
373  if (!SIInstrInfo::isMUBUF(*MI))
374  return false;
375 
376  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
377 
378  return isUInt<12>(NewOffset);
379 }
380 
382  const MachineFunction &MF, unsigned Kind) const {
383  // This is inaccurate. It depends on the instruction and address space. The
384  // only place where we should hit this is for dealing with frame indexes /
385  // private accesses, so this is correct in that case.
386  return &AMDGPU::VGPR_32RegClass;
387 }
388 
389 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
390 
391  switch (Op) {
392  case AMDGPU::SI_SPILL_S512_SAVE:
393  case AMDGPU::SI_SPILL_S512_RESTORE:
394  case AMDGPU::SI_SPILL_V512_SAVE:
395  case AMDGPU::SI_SPILL_V512_RESTORE:
396  return 16;
397  case AMDGPU::SI_SPILL_S256_SAVE:
398  case AMDGPU::SI_SPILL_S256_RESTORE:
399  case AMDGPU::SI_SPILL_V256_SAVE:
400  case AMDGPU::SI_SPILL_V256_RESTORE:
401  return 8;
402  case AMDGPU::SI_SPILL_S128_SAVE:
403  case AMDGPU::SI_SPILL_S128_RESTORE:
404  case AMDGPU::SI_SPILL_V128_SAVE:
405  case AMDGPU::SI_SPILL_V128_RESTORE:
406  return 4;
407  case AMDGPU::SI_SPILL_V96_SAVE:
408  case AMDGPU::SI_SPILL_V96_RESTORE:
409  return 3;
410  case AMDGPU::SI_SPILL_S64_SAVE:
411  case AMDGPU::SI_SPILL_S64_RESTORE:
412  case AMDGPU::SI_SPILL_V64_SAVE:
413  case AMDGPU::SI_SPILL_V64_RESTORE:
414  return 2;
415  case AMDGPU::SI_SPILL_S32_SAVE:
416  case AMDGPU::SI_SPILL_S32_RESTORE:
417  case AMDGPU::SI_SPILL_V32_SAVE:
418  case AMDGPU::SI_SPILL_V32_RESTORE:
419  return 1;
420  default: llvm_unreachable("Invalid spill opcode");
421  }
422 }
423 
424 static int getOffsetMUBUFStore(unsigned Opc) {
425  switch (Opc) {
426  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
427  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
428  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
429  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
430  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
431  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
432  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
433  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
434  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
435  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
436  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
437  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
438  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
439  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
440  default:
441  return -1;
442  }
443 }
444 
445 static int getOffsetMUBUFLoad(unsigned Opc) {
446  switch (Opc) {
447  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
448  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
449  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
450  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
451  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
452  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
453  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
454  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
455  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
456  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
457  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
458  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
459  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
460  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
461  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
462  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
463  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
464  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
465  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
466  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
467  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
468  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
469  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
470  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
471  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
472  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
473  default:
474  return -1;
475  }
476 }
477 
478 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
479 // need to handle the case where an SGPR may need to be spilled while spilling.
481  MachineFrameInfo &MFI,
483  int Index,
484  int64_t Offset) {
485  MachineBasicBlock *MBB = MI->getParent();
486  const DebugLoc &DL = MI->getDebugLoc();
487  bool IsStore = MI->mayStore();
488 
489  unsigned Opc = MI->getOpcode();
490  int LoadStoreOp = IsStore ?
492  if (LoadStoreOp == -1)
493  return false;
494 
495  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
496  MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
497  .add(*Reg)
498  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
499  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
500  .addImm(Offset)
501  .addImm(0) // glc
502  .addImm(0) // slc
503  .addImm(0) // tfe
504  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
505 
506  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
507  AMDGPU::OpName::vdata_in);
508  if (VDataIn)
509  NewMI.add(*VDataIn);
510  return true;
511 }
512 
513 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
514  unsigned LoadStoreOp,
515  int Index,
516  unsigned ValueReg,
517  bool IsKill,
518  unsigned ScratchRsrcReg,
519  unsigned ScratchOffsetReg,
520  int64_t InstOffset,
521  MachineMemOperand *MMO,
522  RegScavenger *RS) const {
523  MachineBasicBlock *MBB = MI->getParent();
524  MachineFunction *MF = MI->getParent()->getParent();
525  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
526  const SIInstrInfo *TII = ST.getInstrInfo();
527  const MachineFrameInfo &MFI = MF->getFrameInfo();
528 
529  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
530  const DebugLoc &DL = MI->getDebugLoc();
531  bool IsStore = Desc.mayStore();
532 
533  bool RanOutOfSGPRs = false;
534  bool Scavenged = false;
535  unsigned SOffset = ScratchOffsetReg;
536 
537  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
538  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
539  unsigned Size = NumSubRegs * 4;
540  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
541  const int64_t OriginalImmOffset = Offset;
542 
543  unsigned Align = MFI.getObjectAlignment(Index);
544  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
545 
546  if (!isUInt<12>(Offset + Size)) {
547  SOffset = AMDGPU::NoRegister;
548 
549  // We don't have access to the register scavenger if this function is called
550  // during PEI::scavengeFrameVirtualRegs().
551  if (RS)
552  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
553 
554  if (SOffset == AMDGPU::NoRegister) {
555  // There are no free SGPRs, and since we are in the process of spilling
556  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
557  // on SI/CI and on VI it is true until we implement spilling using scalar
558  // stores), we have no way to free up an SGPR. Our solution here is to
559  // add the offset directly to the ScratchOffset register, and then
560  // subtract the offset after the spill to return ScratchOffset to it's
561  // original value.
562  RanOutOfSGPRs = true;
563  SOffset = ScratchOffsetReg;
564  } else {
565  Scavenged = true;
566  }
567 
568  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
569  .addReg(ScratchOffsetReg)
570  .addImm(Offset);
571 
572  Offset = 0;
573  }
574 
575  const unsigned EltSize = 4;
576 
577  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
578  unsigned SubReg = NumSubRegs == 1 ?
579  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
580 
581  unsigned SOffsetRegState = 0;
582  unsigned SrcDstRegState = getDefRegState(!IsStore);
583  if (i + 1 == e) {
584  SOffsetRegState |= getKillRegState(Scavenged);
585  // The last implicit use carries the "Kill" flag.
586  SrcDstRegState |= getKillRegState(IsKill);
587  }
588 
589  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
590  MachineMemOperand *NewMMO
591  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
592  EltSize, MinAlign(Align, EltSize * i));
593 
594  auto MIB = BuildMI(*MBB, MI, DL, Desc)
595  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
596  .addReg(ScratchRsrcReg)
597  .addReg(SOffset, SOffsetRegState)
598  .addImm(Offset)
599  .addImm(0) // glc
600  .addImm(0) // slc
601  .addImm(0) // tfe
602  .addMemOperand(NewMMO);
603 
604  if (NumSubRegs > 1)
605  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
606  }
607 
608  if (RanOutOfSGPRs) {
609  // Subtract the offset we added to the ScratchOffset register.
610  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
611  .addReg(ScratchOffsetReg)
612  .addImm(OriginalImmOffset);
613  }
614 }
615 
616 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
617  bool Store) {
618  if (SuperRegSize % 16 == 0) {
619  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
620  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
621  }
622 
623  if (SuperRegSize % 8 == 0) {
624  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
625  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
626  }
627 
628  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
629  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
630 }
631 
633  int Index,
634  RegScavenger *RS,
635  bool OnlyToVGPR) const {
636  MachineBasicBlock *MBB = MI->getParent();
637  MachineFunction *MF = MBB->getParent();
639  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
640 
642  = MFI->getSGPRToVGPRSpills(Index);
643  bool SpillToVGPR = !VGPRSpills.empty();
644  if (OnlyToVGPR && !SpillToVGPR)
645  return false;
646 
648  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
649  const SIInstrInfo *TII = ST.getInstrInfo();
650 
651  unsigned SuperReg = MI->getOperand(0).getReg();
652  bool IsKill = MI->getOperand(0).isKill();
653  const DebugLoc &DL = MI->getDebugLoc();
654 
655  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
656 
657  bool SpillToSMEM = spillSGPRToSMEM();
658  if (SpillToSMEM && OnlyToVGPR)
659  return false;
660 
661  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
662 
663  unsigned OffsetReg = AMDGPU::M0;
664  unsigned M0CopyReg = AMDGPU::NoRegister;
665 
666  if (SpillToSMEM) {
667  if (RS->isRegUsed(AMDGPU::M0)) {
668  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
669  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
670  .addReg(AMDGPU::M0);
671  }
672  }
673 
674  unsigned ScalarStoreOp;
675  unsigned EltSize = 4;
676  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
677  if (SpillToSMEM && isSGPRClass(RC)) {
678  // XXX - if private_element_size is larger than 4 it might be useful to be
679  // able to spill wider vmem spills.
680  std::tie(EltSize, ScalarStoreOp) =
681  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
682  }
683 
684  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
685  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
686 
687  // SubReg carries the "Kill" flag when SubReg == SuperReg.
688  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
689  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
690  unsigned SubReg = NumSubRegs == 1 ?
691  SuperReg : getSubReg(SuperReg, SplitParts[i]);
692 
693  if (SpillToSMEM) {
694  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
695 
696  // The allocated memory size is really the wavefront size * the frame
697  // index size. The widest register class is 64 bytes, so a 4-byte scratch
698  // allocation is enough to spill this in a single stack object.
699  //
700  // FIXME: Frame size/offsets are computed earlier than this, so the extra
701  // space is still unnecessarily allocated.
702 
703  unsigned Align = FrameInfo.getObjectAlignment(Index);
704  MachinePointerInfo PtrInfo
705  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
706  MachineMemOperand *MMO
708  EltSize, MinAlign(Align, EltSize * i));
709 
710  // SMEM instructions only support a single offset, so increment the wave
711  // offset.
712 
713  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
714  if (Offset != 0) {
715  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
716  .addReg(MFI->getFrameOffsetReg())
717  .addImm(Offset);
718  } else {
719  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
720  .addReg(MFI->getFrameOffsetReg());
721  }
722 
723  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
724  .addReg(SubReg, getKillRegState(IsKill)) // sdata
725  .addReg(MFI->getScratchRSrcReg()) // sbase
726  .addReg(OffsetReg, RegState::Kill) // soff
727  .addImm(0) // glc
728  .addMemOperand(MMO);
729 
730  continue;
731  }
732 
733  if (SpillToVGPR) {
734  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
735 
736  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
737  // only circumstance in which we say it is undefined is when it is the
738  // first spill to this VGPR in the first basic block.
739  bool VGPRDefined = true;
740  if (MBB == &MF->front())
741  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
742 
743  // Mark the "old value of vgpr" input undef only if this is the first sgpr
744  // spill to this specific vgpr in the first basic block.
745  BuildMI(*MBB, MI, DL,
746  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
747  Spill.VGPR)
748  .addReg(SubReg, getKillRegState(IsKill))
749  .addImm(Spill.Lane)
750  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
751 
752  // FIXME: Since this spills to another register instead of an actual
753  // frame index, we should delete the frame index when all references to
754  // it are fixed.
755  } else {
756  // XXX - Can to VGPR spill fail for some subregisters but not others?
757  if (OnlyToVGPR)
758  return false;
759 
760  // Spill SGPR to a frame index.
761  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
762  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
763  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
764 
766  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
767  .addReg(SubReg, SubKillState);
768 
769 
770  // There could be undef components of a spilled super register.
771  // TODO: Can we detect this and skip the spill?
772  if (NumSubRegs > 1) {
773  // The last implicit use of the SuperReg carries the "Kill" flag.
774  unsigned SuperKillState = 0;
775  if (i + 1 == e)
776  SuperKillState |= getKillRegState(IsKill);
777  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
778  }
779 
780  unsigned Align = FrameInfo.getObjectAlignment(Index);
781  MachinePointerInfo PtrInfo
782  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
783  MachineMemOperand *MMO
785  EltSize, MinAlign(Align, EltSize * i));
786  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
787  .addReg(TmpReg, RegState::Kill) // src
788  .addFrameIndex(Index) // vaddr
789  .addReg(MFI->getScratchRSrcReg()) // srrsrc
790  .addReg(MFI->getFrameOffsetReg()) // soffset
791  .addImm(i * 4) // offset
792  .addMemOperand(MMO);
793  }
794  }
795 
796  if (M0CopyReg != AMDGPU::NoRegister) {
797  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
798  .addReg(M0CopyReg, RegState::Kill);
799  }
800 
801  MI->eraseFromParent();
802  MFI->addToSpilledSGPRs(NumSubRegs);
803  return true;
804 }
805 
807  int Index,
808  RegScavenger *RS,
809  bool OnlyToVGPR) const {
810  MachineFunction *MF = MI->getParent()->getParent();
812  MachineBasicBlock *MBB = MI->getParent();
814 
816  = MFI->getSGPRToVGPRSpills(Index);
817  bool SpillToVGPR = !VGPRSpills.empty();
818  if (OnlyToVGPR && !SpillToVGPR)
819  return false;
820 
821  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
822  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
823  const SIInstrInfo *TII = ST.getInstrInfo();
824  const DebugLoc &DL = MI->getDebugLoc();
825 
826  unsigned SuperReg = MI->getOperand(0).getReg();
827  bool SpillToSMEM = spillSGPRToSMEM();
828  if (SpillToSMEM && OnlyToVGPR)
829  return false;
830 
831  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
832 
833  unsigned OffsetReg = AMDGPU::M0;
834  unsigned M0CopyReg = AMDGPU::NoRegister;
835 
836  if (SpillToSMEM) {
837  if (RS->isRegUsed(AMDGPU::M0)) {
838  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
839  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
840  .addReg(AMDGPU::M0);
841  }
842  }
843 
844  unsigned EltSize = 4;
845  unsigned ScalarLoadOp;
846 
847  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
848  if (SpillToSMEM && isSGPRClass(RC)) {
849  // XXX - if private_element_size is larger than 4 it might be useful to be
850  // able to spill wider vmem spills.
851  std::tie(EltSize, ScalarLoadOp) =
852  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
853  }
854 
855  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
856  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
857 
858  // SubReg carries the "Kill" flag when SubReg == SuperReg.
859  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
860 
861  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
862  unsigned SubReg = NumSubRegs == 1 ?
863  SuperReg : getSubReg(SuperReg, SplitParts[i]);
864 
865  if (SpillToSMEM) {
866  // FIXME: Size may be > 4 but extra bytes wasted.
867  unsigned Align = FrameInfo.getObjectAlignment(Index);
868  MachinePointerInfo PtrInfo
869  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
870  MachineMemOperand *MMO
872  EltSize, MinAlign(Align, EltSize * i));
873 
874  // Add i * 4 offset
875  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
876  if (Offset != 0) {
877  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
878  .addReg(MFI->getFrameOffsetReg())
879  .addImm(Offset);
880  } else {
881  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
882  .addReg(MFI->getFrameOffsetReg());
883  }
884 
885  auto MIB =
886  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
887  .addReg(MFI->getScratchRSrcReg()) // sbase
888  .addReg(OffsetReg, RegState::Kill) // soff
889  .addImm(0) // glc
890  .addMemOperand(MMO);
891 
892  if (NumSubRegs > 1)
893  MIB.addReg(SuperReg, RegState::ImplicitDefine);
894 
895  continue;
896  }
897 
898  if (SpillToVGPR) {
899  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
900  auto MIB =
901  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
902  SubReg)
903  .addReg(Spill.VGPR)
904  .addImm(Spill.Lane);
905 
906  if (NumSubRegs > 1)
907  MIB.addReg(SuperReg, RegState::ImplicitDefine);
908  } else {
909  if (OnlyToVGPR)
910  return false;
911 
912  // Restore SGPR from a stack slot.
913  // FIXME: We should use S_LOAD_DWORD here for VI.
914  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
915  unsigned Align = FrameInfo.getObjectAlignment(Index);
916 
917  MachinePointerInfo PtrInfo
918  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
919 
920  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
921  MachineMemOperand::MOLoad, EltSize,
922  MinAlign(Align, EltSize * i));
923 
924  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
925  .addFrameIndex(Index) // vaddr
926  .addReg(MFI->getScratchRSrcReg()) // srsrc
927  .addReg(MFI->getFrameOffsetReg()) // soffset
928  .addImm(i * 4) // offset
929  .addMemOperand(MMO);
930 
931  auto MIB =
932  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
933  .addReg(TmpReg, RegState::Kill);
934 
935  if (NumSubRegs > 1)
936  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
937  }
938  }
939 
940  if (M0CopyReg != AMDGPU::NoRegister) {
941  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
942  .addReg(M0CopyReg, RegState::Kill);
943  }
944 
945  MI->eraseFromParent();
946  return true;
947 }
948 
949 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
950 /// a VGPR and the stack slot can be safely eliminated when all other users are
951 /// handled.
954  int FI,
955  RegScavenger *RS) const {
956  switch (MI->getOpcode()) {
957  case AMDGPU::SI_SPILL_S512_SAVE:
958  case AMDGPU::SI_SPILL_S256_SAVE:
959  case AMDGPU::SI_SPILL_S128_SAVE:
960  case AMDGPU::SI_SPILL_S64_SAVE:
961  case AMDGPU::SI_SPILL_S32_SAVE:
962  return spillSGPR(MI, FI, RS, true);
963  case AMDGPU::SI_SPILL_S512_RESTORE:
964  case AMDGPU::SI_SPILL_S256_RESTORE:
965  case AMDGPU::SI_SPILL_S128_RESTORE:
966  case AMDGPU::SI_SPILL_S64_RESTORE:
967  case AMDGPU::SI_SPILL_S32_RESTORE:
968  return restoreSGPR(MI, FI, RS, true);
969  default:
970  llvm_unreachable("not an SGPR spill instruction");
971  }
972 }
973 
975  int SPAdj, unsigned FIOperandNum,
976  RegScavenger *RS) const {
977  MachineFunction *MF = MI->getParent()->getParent();
979  MachineBasicBlock *MBB = MI->getParent();
981  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
982  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
983  const SIInstrInfo *TII = ST.getInstrInfo();
984  DebugLoc DL = MI->getDebugLoc();
985 
986  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
987  int Index = MI->getOperand(FIOperandNum).getIndex();
988 
989  switch (MI->getOpcode()) {
990  // SGPR register spill
991  case AMDGPU::SI_SPILL_S512_SAVE:
992  case AMDGPU::SI_SPILL_S256_SAVE:
993  case AMDGPU::SI_SPILL_S128_SAVE:
994  case AMDGPU::SI_SPILL_S64_SAVE:
995  case AMDGPU::SI_SPILL_S32_SAVE: {
996  spillSGPR(MI, Index, RS);
997  break;
998  }
999 
1000  // SGPR register restore
1001  case AMDGPU::SI_SPILL_S512_RESTORE:
1002  case AMDGPU::SI_SPILL_S256_RESTORE:
1003  case AMDGPU::SI_SPILL_S128_RESTORE:
1004  case AMDGPU::SI_SPILL_S64_RESTORE:
1005  case AMDGPU::SI_SPILL_S32_RESTORE: {
1006  restoreSGPR(MI, Index, RS);
1007  break;
1008  }
1009 
1010  // VGPR register spill
1011  case AMDGPU::SI_SPILL_V512_SAVE:
1012  case AMDGPU::SI_SPILL_V256_SAVE:
1013  case AMDGPU::SI_SPILL_V128_SAVE:
1014  case AMDGPU::SI_SPILL_V96_SAVE:
1015  case AMDGPU::SI_SPILL_V64_SAVE:
1016  case AMDGPU::SI_SPILL_V32_SAVE: {
1017  const MachineOperand *VData = TII->getNamedOperand(*MI,
1018  AMDGPU::OpName::vdata);
1019  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1020  Index,
1021  VData->getReg(), VData->isKill(),
1022  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1023  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1024  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1025  *MI->memoperands_begin(),
1026  RS);
1027  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1028  MI->eraseFromParent();
1029  break;
1030  }
1031  case AMDGPU::SI_SPILL_V32_RESTORE:
1032  case AMDGPU::SI_SPILL_V64_RESTORE:
1033  case AMDGPU::SI_SPILL_V96_RESTORE:
1034  case AMDGPU::SI_SPILL_V128_RESTORE:
1035  case AMDGPU::SI_SPILL_V256_RESTORE:
1036  case AMDGPU::SI_SPILL_V512_RESTORE: {
1037  const MachineOperand *VData = TII->getNamedOperand(*MI,
1038  AMDGPU::OpName::vdata);
1039 
1040  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1041  Index,
1042  VData->getReg(), VData->isKill(),
1043  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1044  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1045  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1046  *MI->memoperands_begin(),
1047  RS);
1048  MI->eraseFromParent();
1049  break;
1050  }
1051 
1052  default: {
1053  const DebugLoc &DL = MI->getDebugLoc();
1054  bool IsMUBUF = TII->isMUBUF(*MI);
1055 
1056  if (!IsMUBUF &&
1057  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1058  // Convert to an absolute stack address by finding the offset from the
1059  // scratch wave base and scaling by the wave size.
1060  //
1061  // In an entry function/kernel the stack address is already the
1062  // absolute address relative to the scratch wave offset.
1063 
1064  unsigned DiffReg
1065  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1066 
1067  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1068  unsigned ResultReg = IsCopy ?
1069  MI->getOperand(0).getReg() :
1070  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1071 
1072  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1073  .addReg(MFI->getFrameOffsetReg())
1074  .addReg(MFI->getScratchWaveOffsetReg());
1075 
1076  int64_t Offset = FrameInfo.getObjectOffset(Index);
1077  if (Offset == 0) {
1078  // XXX - This never happens because of emergency scavenging slot at 0?
1079  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1080  .addImm(Log2_32(ST.getWavefrontSize()))
1081  .addReg(DiffReg);
1082  } else {
1083  unsigned ScaledReg
1084  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1085 
1086  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1087  .addImm(Log2_32(ST.getWavefrontSize()))
1088  .addReg(DiffReg, RegState::Kill);
1089 
1090  // TODO: Fold if use instruction is another add of a constant.
1092  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1093  .addImm(Offset)
1094  .addReg(ScaledReg, RegState::Kill);
1095  } else {
1096  unsigned ConstOffsetReg
1097  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1098 
1099  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1100  .addImm(Offset);
1101  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1102  .addReg(ConstOffsetReg, RegState::Kill)
1103  .addReg(ScaledReg, RegState::Kill);
1104  }
1105  }
1106 
1107  // Don't introduce an extra copy if we're just materializing in a mov.
1108  if (IsCopy)
1109  MI->eraseFromParent();
1110  else
1111  FIOp.ChangeToRegister(ResultReg, false, false, true);
1112  return;
1113  }
1114 
1115  if (IsMUBUF) {
1116  // Disable offen so we don't need a 0 vgpr base.
1117  assert(static_cast<int>(FIOperandNum) ==
1118  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1119  AMDGPU::OpName::vaddr));
1120 
1121  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1122  == MFI->getFrameOffsetReg());
1123 
1124  int64_t Offset = FrameInfo.getObjectOffset(Index);
1125  int64_t OldImm
1126  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1127  int64_t NewOffset = OldImm + Offset;
1128 
1129  if (isUInt<12>(NewOffset) &&
1130  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1131  MI->eraseFromParent();
1132  return;
1133  }
1134  }
1135 
1136  // If the offset is simply too big, don't convert to a scratch wave offset
1137  // relative index.
1138 
1139  int64_t Offset = FrameInfo.getObjectOffset(Index);
1140  FIOp.ChangeToImmediate(Offset);
1141  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1142  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1143  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1144  .addImm(Offset);
1145  FIOp.ChangeToRegister(TmpReg, false, false, true);
1146  }
1147  }
1148  }
1149 }
1150 
1152  #define AMDGPU_REG_ASM_NAMES
1153  #include "AMDGPURegAsmNames.inc.cpp"
1154 
1155  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1156  if (Reg >= BeginReg && Reg <= EndReg) { \
1157  unsigned Index = Reg - BeginReg; \
1158  assert(Index < array_lengthof(RegTable)); \
1159  return RegTable[Index]; \
1160  }
1161 
1162  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1163  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1164  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1165  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1166  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1167  VGPR96RegNames);
1168 
1169  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1170  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1171  VGPR128RegNames);
1172  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1173  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1174  SGPR128RegNames);
1175 
1176  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1177  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1178  VGPR256RegNames);
1179 
1180  REG_RANGE(
1181  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1182  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1183  VGPR512RegNames);
1184 
1185  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1186  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1187  SGPR256RegNames);
1188 
1189  REG_RANGE(
1190  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1191  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1192  SGPR512RegNames
1193  );
1194 
1195 #undef REG_RANGE
1196 
1197  // FIXME: Rename flat_scr so we don't need to special case this.
1198  switch (Reg) {
1199  case AMDGPU::FLAT_SCR:
1200  return "flat_scratch";
1201  case AMDGPU::FLAT_SCR_LO:
1202  return "flat_scratch_lo";
1203  case AMDGPU::FLAT_SCR_HI:
1204  return "flat_scratch_hi";
1205  default:
1206  // For the special named registers the default is fine.
1208  }
1209 }
1210 
1211 // FIXME: This is very slow. It might be worth creating a map from physreg to
1212 // register class.
1215 
1216  static const TargetRegisterClass *const BaseClasses[] = {
1217  &AMDGPU::VGPR_32RegClass,
1218  &AMDGPU::SReg_32RegClass,
1219  &AMDGPU::VReg_64RegClass,
1220  &AMDGPU::SReg_64RegClass,
1221  &AMDGPU::VReg_96RegClass,
1222  &AMDGPU::VReg_128RegClass,
1223  &AMDGPU::SReg_128RegClass,
1224  &AMDGPU::VReg_256RegClass,
1225  &AMDGPU::SReg_256RegClass,
1226  &AMDGPU::VReg_512RegClass,
1227  &AMDGPU::SReg_512RegClass,
1228  &AMDGPU::SCC_CLASSRegClass,
1229  &AMDGPU::R600_Reg32RegClass,
1230  &AMDGPU::R600_PredicateRegClass,
1231  &AMDGPU::Pseudo_SReg_32RegClass,
1232  &AMDGPU::Pseudo_SReg_128RegClass,
1233  };
1234 
1235  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1236  if (BaseClass->contains(Reg)) {
1237  return BaseClass;
1238  }
1239  }
1240  return nullptr;
1241 }
1242 
1243 // TODO: It might be helpful to have some target specific flags in
1244 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1246  unsigned Size = getRegSizeInBits(*RC);
1247  if (Size < 32)
1248  return false;
1249  switch (Size) {
1250  case 32:
1251  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1252  case 64:
1253  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1254  case 96:
1255  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1256  case 128:
1257  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1258  case 256:
1259  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1260  case 512:
1261  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1262  default:
1263  llvm_unreachable("Invalid register class size");
1264  }
1265 }
1266 
1268  const TargetRegisterClass *SRC) const {
1269  switch (getRegSizeInBits(*SRC)) {
1270  case 32:
1271  return &AMDGPU::VGPR_32RegClass;
1272  case 64:
1273  return &AMDGPU::VReg_64RegClass;
1274  case 96:
1275  return &AMDGPU::VReg_96RegClass;
1276  case 128:
1277  return &AMDGPU::VReg_128RegClass;
1278  case 256:
1279  return &AMDGPU::VReg_256RegClass;
1280  case 512:
1281  return &AMDGPU::VReg_512RegClass;
1282  default:
1283  llvm_unreachable("Invalid register class size");
1284  }
1285 }
1286 
1288  const TargetRegisterClass *VRC) const {
1289  switch (getRegSizeInBits(*VRC)) {
1290  case 32:
1291  return &AMDGPU::SGPR_32RegClass;
1292  case 64:
1293  return &AMDGPU::SReg_64RegClass;
1294  case 128:
1295  return &AMDGPU::SReg_128RegClass;
1296  case 256:
1297  return &AMDGPU::SReg_256RegClass;
1298  case 512:
1299  return &AMDGPU::SReg_512RegClass;
1300  default:
1301  llvm_unreachable("Invalid register class size");
1302  }
1303 }
1304 
1306  const TargetRegisterClass *RC, unsigned SubIdx) const {
1307  if (SubIdx == AMDGPU::NoSubRegister)
1308  return RC;
1309 
1310  // We can assume that each lane corresponds to one 32-bit register.
1311  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1312  if (isSGPRClass(RC)) {
1313  switch (Count) {
1314  case 1:
1315  return &AMDGPU::SGPR_32RegClass;
1316  case 2:
1317  return &AMDGPU::SReg_64RegClass;
1318  case 4:
1319  return &AMDGPU::SReg_128RegClass;
1320  case 8:
1321  return &AMDGPU::SReg_256RegClass;
1322  case 16: /* fall-through */
1323  default:
1324  llvm_unreachable("Invalid sub-register class size");
1325  }
1326  } else {
1327  switch (Count) {
1328  case 1:
1329  return &AMDGPU::VGPR_32RegClass;
1330  case 2:
1331  return &AMDGPU::VReg_64RegClass;
1332  case 3:
1333  return &AMDGPU::VReg_96RegClass;
1334  case 4:
1335  return &AMDGPU::VReg_128RegClass;
1336  case 8:
1337  return &AMDGPU::VReg_256RegClass;
1338  case 16: /* fall-through */
1339  default:
1340  llvm_unreachable("Invalid sub-register class size");
1341  }
1342  }
1343 }
1344 
1346  const TargetRegisterClass *DefRC,
1347  unsigned DefSubReg,
1348  const TargetRegisterClass *SrcRC,
1349  unsigned SrcSubReg) const {
1350  // We want to prefer the smallest register class possible, so we don't want to
1351  // stop and rewrite on anything that looks like a subregister
1352  // extract. Operations mostly don't care about the super register class, so we
1353  // only want to stop on the most basic of copies between the same register
1354  // class.
1355  //
1356  // e.g. if we have something like
1357  // %0 = ...
1358  // %1 = ...
1359  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1360  // %3 = COPY %2, sub0
1361  //
1362  // We want to look through the COPY to find:
1363  // => %3 = COPY %0
1364 
1365  // Plain copy.
1366  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1367 }
1368 
1369 /// \brief Returns a register that is not used at any point in the function.
1370 /// If all registers are used, then this function will return
1371 // AMDGPU::NoRegister.
1372 unsigned
1374  const TargetRegisterClass *RC,
1375  const MachineFunction &MF) const {
1376 
1377  for (unsigned Reg : *RC)
1378  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1379  return Reg;
1380  return AMDGPU::NoRegister;
1381 }
1382 
1384  unsigned EltSize) const {
1385  if (EltSize == 4) {
1386  static const int16_t Sub0_15[] = {
1387  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1388  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1389  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1390  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1391  };
1392 
1393  static const int16_t Sub0_7[] = {
1394  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1395  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1396  };
1397 
1398  static const int16_t Sub0_3[] = {
1399  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1400  };
1401 
1402  static const int16_t Sub0_2[] = {
1403  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1404  };
1405 
1406  static const int16_t Sub0_1[] = {
1407  AMDGPU::sub0, AMDGPU::sub1,
1408  };
1409 
1410  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1411  case 32:
1412  return {};
1413  case 64:
1414  return makeArrayRef(Sub0_1);
1415  case 96:
1416  return makeArrayRef(Sub0_2);
1417  case 128:
1418  return makeArrayRef(Sub0_3);
1419  case 256:
1420  return makeArrayRef(Sub0_7);
1421  case 512:
1422  return makeArrayRef(Sub0_15);
1423  default:
1424  llvm_unreachable("unhandled register size");
1425  }
1426  }
1427 
1428  if (EltSize == 8) {
1429  static const int16_t Sub0_15_64[] = {
1430  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1431  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1432  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1433  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1434  };
1435 
1436  static const int16_t Sub0_7_64[] = {
1437  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1438  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1439  };
1440 
1441 
1442  static const int16_t Sub0_3_64[] = {
1443  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1444  };
1445 
1446  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1447  case 64:
1448  return {};
1449  case 128:
1450  return makeArrayRef(Sub0_3_64);
1451  case 256:
1452  return makeArrayRef(Sub0_7_64);
1453  case 512:
1454  return makeArrayRef(Sub0_15_64);
1455  default:
1456  llvm_unreachable("unhandled register size");
1457  }
1458  }
1459 
1460  assert(EltSize == 16 && "unhandled register spill split size");
1461 
1462  static const int16_t Sub0_15_128[] = {
1463  AMDGPU::sub0_sub1_sub2_sub3,
1464  AMDGPU::sub4_sub5_sub6_sub7,
1465  AMDGPU::sub8_sub9_sub10_sub11,
1466  AMDGPU::sub12_sub13_sub14_sub15
1467  };
1468 
1469  static const int16_t Sub0_7_128[] = {
1470  AMDGPU::sub0_sub1_sub2_sub3,
1471  AMDGPU::sub4_sub5_sub6_sub7
1472  };
1473 
1474  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1475  case 128:
1476  return {};
1477  case 256:
1478  return makeArrayRef(Sub0_7_128);
1479  case 512:
1480  return makeArrayRef(Sub0_15_128);
1481  default:
1482  llvm_unreachable("unhandled register size");
1483  }
1484 }
1485 
1486 const TargetRegisterClass*
1488  unsigned Reg) const {
1490  return MRI.getRegClass(Reg);
1491 
1492  return getPhysRegClass(Reg);
1493 }
1494 
1496  unsigned Reg) const {
1497  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1498  assert(RC && "Register class for the reg not found");
1499  return hasVGPRs(RC);
1500 }
1501 
1503  const TargetRegisterClass *SrcRC,
1504  unsigned SubReg,
1505  const TargetRegisterClass *DstRC,
1506  unsigned DstSubReg,
1507  const TargetRegisterClass *NewRC,
1508  LiveIntervals &LIS) const {
1509  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1510  unsigned DstSize = getRegSizeInBits(*DstRC);
1511  unsigned NewSize = getRegSizeInBits(*NewRC);
1512 
1513  // Do not increase size of registers beyond dword, we would need to allocate
1514  // adjacent registers and constraint regalloc more than needed.
1515 
1516  // Always allow dword coalescing.
1517  if (SrcSize <= 32 || DstSize <= 32)
1518  return true;
1519 
1520  return NewSize <= DstSize || NewSize <= SrcSize;
1521 }
1522 
1524  MachineFunction &MF) const {
1525 
1526  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1528 
1529  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1530  MF.getFunction());
1531  switch (RC->getID()) {
1532  default:
1533  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1534  case AMDGPU::VGPR_32RegClassID:
1535  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1536  case AMDGPU::SGPR_32RegClassID:
1537  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1538  }
1539 }
1540 
1542  unsigned Idx) const {
1543  if (Idx == getVGPRPressureSet())
1544  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1545  const_cast<MachineFunction &>(MF));
1546 
1547  if (Idx == getSGPRPressureSet())
1548  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1549  const_cast<MachineFunction &>(MF));
1550 
1551  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1552 }
1553 
1554 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1555  static const int Empty[] = { -1 };
1556 
1557  if (hasRegUnit(AMDGPU::M0, RegUnit))
1558  return Empty;
1559  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1560 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
SIRegisterInfo(const SISubtarget &ST)
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
const SIInstrInfo * getInstrInfo() const override
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:652
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:34
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const
Find an unused register of the specified register class.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
unsigned getSubRegFromChannel(unsigned Channel) const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:710
A description of a memory reference used in the backend.
bool hasInv2PiInlineImm() const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Reg
All possible values of the reg field in the ModR/M byte.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:398
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
bool hasScalarStores() const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:393
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:142
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
const unsigned Kind
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
bool hasCalls() const
Return true if the current function has any function calls.