LLVM  10.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/LLVMContext.h"
29 
30 using namespace llvm;
31 
32 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
33  for (unsigned i = 0; PSets[i] != -1; ++i) {
34  if (PSets[i] == (int)PSetID)
35  return true;
36  }
37  return false;
38 }
39 
40 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
41  BitVector &PressureSets) const {
42  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
43  const int *PSets = getRegUnitPressureSets(*U);
44  if (hasPressureSet(PSets, PSetID)) {
45  PressureSets.set(PSetID);
46  break;
47  }
48  }
49 }
50 
52  "amdgpu-spill-sgpr-to-vgpr",
53  cl::desc("Enable spilling VGPRs to SGPRs"),
55  cl::init(true));
56 
59  ST(ST),
60  SGPRPressureSets(getNumRegPressureSets()),
61  VGPRPressureSets(getNumRegPressureSets()),
62  AGPRPressureSets(getNumRegPressureSets()),
63  SpillSGPRToVGPR(EnableSpillSGPRToVGPR),
64  isWave32(ST.isWave32()) {
65  unsigned NumRegPressureSets = getNumRegPressureSets();
66 
67  SGPRSetID = NumRegPressureSets;
68  VGPRSetID = NumRegPressureSets;
69  AGPRSetID = NumRegPressureSets;
70 
71  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
72  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
73  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
74  classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
75  }
76 
77  // Determine the number of reg units for each pressure set.
78  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
79  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
80  const int *PSets = getRegUnitPressureSets(i);
81  for (unsigned j = 0; PSets[j] != -1; ++j) {
82  ++PressureSetRegUnits[PSets[j]];
83  }
84  }
85 
86  unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
87  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
88  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
89  VGPRSetID = i;
90  VGPRMax = PressureSetRegUnits[i];
91  continue;
92  }
93  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
94  SGPRSetID = i;
95  SGPRMax = PressureSetRegUnits[i];
96  }
97  if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
98  AGPRSetID = i;
99  AGPRMax = PressureSetRegUnits[i];
100  continue;
101  }
102  }
103 
104  assert(SGPRSetID < NumRegPressureSets &&
105  VGPRSetID < NumRegPressureSets &&
106  AGPRSetID < NumRegPressureSets);
107 }
108 
110  const MachineFunction &MF) const {
111  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
112  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
113  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
114 }
115 
116 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
117  unsigned Reg;
118 
119  // Try to place it in a hole after PrivateSegmentBufferReg.
120  if (RegCount & 3) {
121  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
122  // alignment constraints, so we have a hole where can put the wave offset.
123  Reg = RegCount - 1;
124  } else {
125  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
126  // wave offset before it.
127  Reg = RegCount - 5;
128  }
129 
130  return Reg;
131 }
132 
134  const MachineFunction &MF) const {
136  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
137 }
138 
140  BitVector Reserved(getNumRegs());
141 
142  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
143  // this seems likely to result in bugs, so I'm marking them as reserved.
144  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
145  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
146 
147  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
148  reserveRegisterTuples(Reserved, AMDGPU::M0);
149 
150  // Reserve src_vccz, src_execz, src_scc.
151  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
152  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
153  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
154 
155  // Reserve the memory aperture registers.
156  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
160 
161  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
162  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
163 
164  // Reserve xnack_mask registers - support is not implemented in Codegen.
165  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
166 
167  // Reserve lds_direct register - support is not implemented in Codegen.
168  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
169 
170  // Reserve Trap Handler registers - support is not implemented in Codegen.
171  reserveRegisterTuples(Reserved, AMDGPU::TBA);
172  reserveRegisterTuples(Reserved, AMDGPU::TMA);
173  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
174  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
175  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
176  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
177  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
178  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
179  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
180  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
181 
182  // Reserve null register - it shall never be allocated
183  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
184 
185  // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
186  // will result in bugs.
187  if (isWave32) {
188  Reserved.set(AMDGPU::VCC);
189  Reserved.set(AMDGPU::VCC_HI);
190  }
191 
192  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
193  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
194  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
195  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
196  reserveRegisterTuples(Reserved, Reg);
197  }
198 
199  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
200  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
201  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
202  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
203  reserveRegisterTuples(Reserved, Reg);
204  Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
205  reserveRegisterTuples(Reserved, Reg);
206  }
207 
208  // Reserve all the rest AGPRs if there are no instructions to use it.
209  if (!ST.hasMAIInsts()) {
210  for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
211  unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
212  reserveRegisterTuples(Reserved, Reg);
213  }
214  }
215 
217 
218  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
219  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
220  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
221  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
222  }
223 
224  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
225  if (ScratchRSrcReg != AMDGPU::NoRegister) {
226  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
227  // to spill.
228  // TODO: May need to reserve a VGPR if doing LDS spilling.
229  reserveRegisterTuples(Reserved, ScratchRSrcReg);
230  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
231  }
232 
233  // We have to assume the SP is needed in case there are calls in the function,
234  // which is detected after the function is lowered. If we aren't really going
235  // to need SP, don't bother reserving it.
236  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
237 
238  if (StackPtrReg != AMDGPU::NoRegister) {
239  reserveRegisterTuples(Reserved, StackPtrReg);
240  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
241  }
242 
243  unsigned FrameReg = MFI->getFrameOffsetReg();
244  if (FrameReg != AMDGPU::NoRegister) {
245  reserveRegisterTuples(Reserved, FrameReg);
246  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
247  }
248 
249  for (unsigned Reg : MFI->WWMReservedRegs) {
250  reserveRegisterTuples(Reserved, Reg);
251  }
252 
253  // FIXME: Stop using reserved registers for this.
254  for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
255  reserveRegisterTuples(Reserved, Reg);
256 
257  for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
258  reserveRegisterTuples(Reserved, Reg);
259 
260  return Reserved;
261 }
262 
265  // On entry, the base address is 0, so it can't possibly need any more
266  // alignment.
267 
268  // FIXME: Should be able to specify the entry frame alignment per calling
269  // convention instead.
270  if (Info->isEntryFunction())
271  return false;
272 
274 }
275 
278  if (Info->isEntryFunction()) {
279  const MachineFrameInfo &MFI = Fn.getFrameInfo();
280  return MFI.hasStackObjects() || MFI.hasCalls();
281  }
282 
283  // May need scavenger for dealing with callee saved registers.
284  return true;
285 }
286 
288  const MachineFunction &MF) const {
289  // Do not use frame virtual registers. They used to be used for SGPRs, but
290  // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
291  // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
292  // spill.
293  return false;
294 }
295 
297  const MachineFunction &MF) const {
298  const MachineFrameInfo &MFI = MF.getFrameInfo();
299  return MFI.hasStackObjects();
300 }
301 
303  const MachineFunction &) const {
304  // There are no special dedicated stack or frame pointers.
305  return true;
306 }
307 
309  // This helps catch bugs as verifier errors.
310  return true;
311 }
312 
315 
316  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
317  AMDGPU::OpName::offset);
318  return MI->getOperand(OffIdx).getImm();
319 }
320 
322  int Idx) const {
323  if (!SIInstrInfo::isMUBUF(*MI))
324  return 0;
325 
327  AMDGPU::OpName::vaddr) &&
328  "Should never see frame index on non-address operand");
329 
330  return getMUBUFInstrOffset(MI);
331 }
332 
334  if (!MI->mayLoadOrStore())
335  return false;
336 
337  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
338 
339  return !isUInt<12>(FullOffset);
340 }
341 
343  unsigned BaseReg,
344  int FrameIdx,
345  int64_t Offset) const {
347  DebugLoc DL; // Defaults to "unknown"
348 
349  if (Ins != MBB->end())
350  DL = Ins->getDebugLoc();
351 
352  MachineFunction *MF = MBB->getParent();
353  const SIInstrInfo *TII = ST.getInstrInfo();
354 
355  if (Offset == 0) {
356  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
357  .addFrameIndex(FrameIdx);
358  return;
359  }
360 
362  Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
363 
364  Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
365 
366  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
367  .addImm(Offset);
368  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
369  .addFrameIndex(FrameIdx);
370 
371  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
372  .addReg(OffsetReg, RegState::Kill)
373  .addReg(FIReg)
374  .addImm(0); // clamp bit
375 }
376 
378  int64_t Offset) const {
379  const SIInstrInfo *TII = ST.getInstrInfo();
380 
381 #ifndef NDEBUG
382  // FIXME: Is it possible to be storing a frame index to itself?
383  bool SeenFI = false;
384  for (const MachineOperand &MO: MI.operands()) {
385  if (MO.isFI()) {
386  if (SeenFI)
387  llvm_unreachable("should not see multiple frame indices");
388 
389  SeenFI = true;
390  }
391  }
392 #endif
393 
394  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
395 #ifndef NDEBUG
396  MachineBasicBlock *MBB = MI.getParent();
397  MachineFunction *MF = MBB->getParent();
398 #endif
399  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
400  assert(TII->isMUBUF(MI));
401  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
402  MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
403  "should only be seeing stack pointer offset relative FrameIndex");
404 
405  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
406  int64_t NewOffset = OffsetOp->getImm() + Offset;
407  assert(isUInt<12>(NewOffset) && "offset should be legal");
408 
409  FIOp->ChangeToRegister(BaseReg, false);
410  OffsetOp->setImm(NewOffset);
411 }
412 
414  unsigned BaseReg,
415  int64_t Offset) const {
416  if (!SIInstrInfo::isMUBUF(*MI))
417  return false;
418 
419  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
420 
421  return isUInt<12>(NewOffset);
422 }
423 
425  const MachineFunction &MF, unsigned Kind) const {
426  // This is inaccurate. It depends on the instruction and address space. The
427  // only place where we should hit this is for dealing with frame indexes /
428  // private accesses, so this is correct in that case.
429  return &AMDGPU::VGPR_32RegClass;
430 }
431 
432 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
433 
434  switch (Op) {
435  case AMDGPU::SI_SPILL_S1024_SAVE:
436  case AMDGPU::SI_SPILL_S1024_RESTORE:
437  case AMDGPU::SI_SPILL_V1024_SAVE:
438  case AMDGPU::SI_SPILL_V1024_RESTORE:
439  case AMDGPU::SI_SPILL_A1024_SAVE:
440  case AMDGPU::SI_SPILL_A1024_RESTORE:
441  return 32;
442  case AMDGPU::SI_SPILL_S512_SAVE:
443  case AMDGPU::SI_SPILL_S512_RESTORE:
444  case AMDGPU::SI_SPILL_V512_SAVE:
445  case AMDGPU::SI_SPILL_V512_RESTORE:
446  case AMDGPU::SI_SPILL_A512_SAVE:
447  case AMDGPU::SI_SPILL_A512_RESTORE:
448  return 16;
449  case AMDGPU::SI_SPILL_S256_SAVE:
450  case AMDGPU::SI_SPILL_S256_RESTORE:
451  case AMDGPU::SI_SPILL_V256_SAVE:
452  case AMDGPU::SI_SPILL_V256_RESTORE:
453  return 8;
454  case AMDGPU::SI_SPILL_S160_SAVE:
455  case AMDGPU::SI_SPILL_S160_RESTORE:
456  case AMDGPU::SI_SPILL_V160_SAVE:
457  case AMDGPU::SI_SPILL_V160_RESTORE:
458  return 5;
459  case AMDGPU::SI_SPILL_S128_SAVE:
460  case AMDGPU::SI_SPILL_S128_RESTORE:
461  case AMDGPU::SI_SPILL_V128_SAVE:
462  case AMDGPU::SI_SPILL_V128_RESTORE:
463  case AMDGPU::SI_SPILL_A128_SAVE:
464  case AMDGPU::SI_SPILL_A128_RESTORE:
465  return 4;
466  case AMDGPU::SI_SPILL_S96_SAVE:
467  case AMDGPU::SI_SPILL_S96_RESTORE:
468  case AMDGPU::SI_SPILL_V96_SAVE:
469  case AMDGPU::SI_SPILL_V96_RESTORE:
470  return 3;
471  case AMDGPU::SI_SPILL_S64_SAVE:
472  case AMDGPU::SI_SPILL_S64_RESTORE:
473  case AMDGPU::SI_SPILL_V64_SAVE:
474  case AMDGPU::SI_SPILL_V64_RESTORE:
475  case AMDGPU::SI_SPILL_A64_SAVE:
476  case AMDGPU::SI_SPILL_A64_RESTORE:
477  return 2;
478  case AMDGPU::SI_SPILL_S32_SAVE:
479  case AMDGPU::SI_SPILL_S32_RESTORE:
480  case AMDGPU::SI_SPILL_V32_SAVE:
481  case AMDGPU::SI_SPILL_V32_RESTORE:
482  case AMDGPU::SI_SPILL_A32_SAVE:
483  case AMDGPU::SI_SPILL_A32_RESTORE:
484  return 1;
485  default: llvm_unreachable("Invalid spill opcode");
486  }
487 }
488 
489 static int getOffsetMUBUFStore(unsigned Opc) {
490  switch (Opc) {
491  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
492  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
493  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
494  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
495  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
496  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
497  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
498  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
499  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
500  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
501  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
502  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
503  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
504  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
505  default:
506  return -1;
507  }
508 }
509 
510 static int getOffsetMUBUFLoad(unsigned Opc) {
511  switch (Opc) {
512  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
513  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
514  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
515  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
516  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
517  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
518  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
519  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
520  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
521  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
522  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
523  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
524  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
525  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
526  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
527  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
528  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
529  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
530  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
531  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
532  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
533  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
534  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
535  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
536  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
537  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
538  default:
539  return -1;
540  }
541 }
542 
545  int Index,
546  unsigned Lane,
547  unsigned ValueReg,
548  bool IsKill) {
549  MachineBasicBlock *MBB = MI->getParent();
550  MachineFunction *MF = MI->getParent()->getParent();
552  const SIInstrInfo *TII = ST.getInstrInfo();
553 
554  MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
555 
556  if (Reg == AMDGPU::NoRegister)
557  return MachineInstrBuilder();
558 
559  bool IsStore = MI->mayStore();
561  auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
562 
563  unsigned Dst = IsStore ? Reg : ValueReg;
564  unsigned Src = IsStore ? ValueReg : Reg;
565  unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
566  : AMDGPU::V_ACCVGPR_READ_B32;
567 
568  return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
569  .addReg(Src, getKillRegState(IsKill));
570 }
571 
572 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
573 // need to handle the case where an SGPR may need to be spilled while spilling.
575  MachineFrameInfo &MFI,
577  int Index,
578  int64_t Offset) {
579  const SIInstrInfo *TII = ST.getInstrInfo();
580  MachineBasicBlock *MBB = MI->getParent();
581  const DebugLoc &DL = MI->getDebugLoc();
582  bool IsStore = MI->mayStore();
583 
584  unsigned Opc = MI->getOpcode();
585  int LoadStoreOp = IsStore ?
587  if (LoadStoreOp == -1)
588  return false;
589 
590  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
591  if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr())
592  return true;
593 
594  MachineInstrBuilder NewMI =
595  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
596  .add(*Reg)
597  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
598  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
599  .addImm(Offset)
600  .addImm(0) // glc
601  .addImm(0) // slc
602  .addImm(0) // tfe
603  .addImm(0) // dlc
604  .addImm(0) // swz
605  .cloneMemRefs(*MI);
606 
607  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
608  AMDGPU::OpName::vdata_in);
609  if (VDataIn)
610  NewMI.add(*VDataIn);
611  return true;
612 }
613 
614 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
615  unsigned LoadStoreOp,
616  int Index,
617  unsigned ValueReg,
618  bool IsKill,
619  unsigned ScratchRsrcReg,
620  unsigned ScratchOffsetReg,
621  int64_t InstOffset,
622  MachineMemOperand *MMO,
623  RegScavenger *RS) const {
624  MachineBasicBlock *MBB = MI->getParent();
625  MachineFunction *MF = MI->getParent()->getParent();
626  const SIInstrInfo *TII = ST.getInstrInfo();
627  const MachineFrameInfo &MFI = MF->getFrameInfo();
628 
629  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
630  const DebugLoc &DL = MI->getDebugLoc();
631  bool IsStore = Desc.mayStore();
632 
633  bool Scavenged = false;
634  unsigned SOffset = ScratchOffsetReg;
635 
636  const unsigned EltSize = 4;
637  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
638  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
639  unsigned Size = NumSubRegs * EltSize;
640  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
641  int64_t ScratchOffsetRegDelta = 0;
642 
643  unsigned Align = MFI.getObjectAlignment(Index);
644  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
645 
646  Register TmpReg =
647  hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
648  : Register();
649 
650  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
651 
652  if (!isUInt<12>(Offset + Size - EltSize)) {
653  SOffset = AMDGPU::NoRegister;
654 
655  // We currently only support spilling VGPRs to EltSize boundaries, meaning
656  // we can simplify the adjustment of Offset here to just scale with
657  // WavefrontSize.
658  Offset *= ST.getWavefrontSize();
659 
660  // We don't have access to the register scavenger if this function is called
661  // during PEI::scavengeFrameVirtualRegs().
662  if (RS)
663  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
664 
665  if (SOffset == AMDGPU::NoRegister) {
666  // There are no free SGPRs, and since we are in the process of spilling
667  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
668  // on SI/CI and on VI it is true until we implement spilling using scalar
669  // stores), we have no way to free up an SGPR. Our solution here is to
670  // add the offset directly to the ScratchOffset register, and then
671  // subtract the offset after the spill to return ScratchOffset to it's
672  // original value.
673  SOffset = ScratchOffsetReg;
674  ScratchOffsetRegDelta = Offset;
675  } else {
676  Scavenged = true;
677  }
678 
679  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
680  .addReg(ScratchOffsetReg)
681  .addImm(Offset);
682 
683  Offset = 0;
684  }
685 
686  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
687  Register SubReg = NumSubRegs == 1
688  ? Register(ValueReg)
689  : getSubReg(ValueReg, getSubRegFromChannel(i));
690 
691  unsigned SOffsetRegState = 0;
692  unsigned SrcDstRegState = getDefRegState(!IsStore);
693  if (i + 1 == e) {
694  SOffsetRegState |= getKillRegState(Scavenged);
695  // The last implicit use carries the "Kill" flag.
696  SrcDstRegState |= getKillRegState(IsKill);
697  }
698 
699  auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill);
700 
701  if (!MIB.getInstr()) {
702  unsigned FinalReg = SubReg;
703  if (TmpReg != AMDGPU::NoRegister) {
704  if (IsStore)
705  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
706  .addReg(SubReg, getKillRegState(IsKill));
707  SubReg = TmpReg;
708  }
709 
710  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
711  MachineMemOperand *NewMMO
712  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
713  EltSize, MinAlign(Align, EltSize * i));
714 
715  MIB = BuildMI(*MBB, MI, DL, Desc)
716  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
717  .addReg(ScratchRsrcReg)
718  .addReg(SOffset, SOffsetRegState)
719  .addImm(Offset)
720  .addImm(0) // glc
721  .addImm(0) // slc
722  .addImm(0) // tfe
723  .addImm(0) // dlc
724  .addImm(0) // swz
725  .addMemOperand(NewMMO);
726 
727  if (!IsStore && TmpReg != AMDGPU::NoRegister)
728  MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
729  FinalReg)
730  .addReg(TmpReg, RegState::Kill);
731  }
732 
733  if (NumSubRegs > 1)
734  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
735  }
736 
737  if (ScratchOffsetRegDelta != 0) {
738  // Subtract the offset we added to the ScratchOffset register.
739  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
740  .addReg(ScratchOffsetReg)
741  .addImm(ScratchOffsetRegDelta);
742  }
743 }
744 
746  int Index,
747  RegScavenger *RS,
748  bool OnlyToVGPR) const {
749  MachineBasicBlock *MBB = MI->getParent();
750  MachineFunction *MF = MBB->getParent();
752  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
753 
755  = MFI->getSGPRToVGPRSpills(Index);
756  bool SpillToVGPR = !VGPRSpills.empty();
757  if (OnlyToVGPR && !SpillToVGPR)
758  return false;
759 
760  const SIInstrInfo *TII = ST.getInstrInfo();
761 
762  Register SuperReg = MI->getOperand(0).getReg();
763  bool IsKill = MI->getOperand(0).isKill();
764  const DebugLoc &DL = MI->getDebugLoc();
765 
766  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
767 
768  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
769  SuperReg != MFI->getFrameOffsetReg() &&
770  SuperReg != MFI->getScratchWaveOffsetReg()));
771 
772  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
773 
774  unsigned M0CopyReg = AMDGPU::NoRegister;
775 
776  unsigned EltSize = 4;
777  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
778 
779  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
780  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
781 
782  // Scavenged temporary VGPR to use. It must be scavenged once for any number
783  // of spilled subregs.
784  Register TmpVGPR;
785 
786  // SubReg carries the "Kill" flag when SubReg == SuperReg.
787  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
788  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
789  Register SubReg =
790  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
791 
792  if (SpillToVGPR) {
793  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
794 
795  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
796  // only circumstance in which we say it is undefined is when it is the
797  // first spill to this VGPR in the first basic block.
798  bool VGPRDefined = true;
799  if (MBB == &MF->front())
800  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
801 
802  // Mark the "old value of vgpr" input undef only if this is the first sgpr
803  // spill to this specific vgpr in the first basic block.
804  BuildMI(*MBB, MI, DL,
805  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
806  Spill.VGPR)
807  .addReg(SubReg, getKillRegState(IsKill))
808  .addImm(Spill.Lane)
809  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
810 
811  // FIXME: Since this spills to another register instead of an actual
812  // frame index, we should delete the frame index when all references to
813  // it are fixed.
814  } else {
815  // XXX - Can to VGPR spill fail for some subregisters but not others?
816  if (OnlyToVGPR)
817  return false;
818 
819  // Spill SGPR to a frame index.
820  if (!TmpVGPR.isValid())
821  TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
822 
824  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
825  .addReg(SubReg, SubKillState);
826 
827  // There could be undef components of a spilled super register.
828  // TODO: Can we detect this and skip the spill?
829  if (NumSubRegs > 1) {
830  // The last implicit use of the SuperReg carries the "Kill" flag.
831  unsigned SuperKillState = 0;
832  if (i + 1 == e)
833  SuperKillState |= getKillRegState(IsKill);
834  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
835  }
836 
837  unsigned Align = FrameInfo.getObjectAlignment(Index);
838  MachinePointerInfo PtrInfo
839  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
840  MachineMemOperand *MMO
842  EltSize, MinAlign(Align, EltSize * i));
843  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
844  .addReg(TmpVGPR, RegState::Kill) // src
845  .addFrameIndex(Index) // vaddr
846  .addReg(MFI->getScratchRSrcReg()) // srrsrc
847  .addReg(MFI->getStackPtrOffsetReg()) // soffset
848  .addImm(i * 4) // offset
849  .addMemOperand(MMO);
850  }
851  }
852 
853  if (M0CopyReg != AMDGPU::NoRegister) {
854  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
855  .addReg(M0CopyReg, RegState::Kill);
856  }
857 
858  MI->eraseFromParent();
859  MFI->addToSpilledSGPRs(NumSubRegs);
860  return true;
861 }
862 
864  int Index,
865  RegScavenger *RS,
866  bool OnlyToVGPR) const {
867  MachineFunction *MF = MI->getParent()->getParent();
868  MachineBasicBlock *MBB = MI->getParent();
870 
872  = MFI->getSGPRToVGPRSpills(Index);
873  bool SpillToVGPR = !VGPRSpills.empty();
874  if (OnlyToVGPR && !SpillToVGPR)
875  return false;
876 
877  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
878  const SIInstrInfo *TII = ST.getInstrInfo();
879  const DebugLoc &DL = MI->getDebugLoc();
880 
881  Register SuperReg = MI->getOperand(0).getReg();
882 
883  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
884 
885  unsigned M0CopyReg = AMDGPU::NoRegister;
886 
887  unsigned EltSize = 4;
888 
889  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
890 
891  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
892  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
893 
894  Register TmpVGPR;
895 
896  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
897  Register SubReg =
898  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
899 
900  if (SpillToVGPR) {
901  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
902  auto MIB =
903  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
904  SubReg)
905  .addReg(Spill.VGPR)
906  .addImm(Spill.Lane);
907 
908  if (NumSubRegs > 1 && i == 0)
909  MIB.addReg(SuperReg, RegState::ImplicitDefine);
910  } else {
911  if (OnlyToVGPR)
912  return false;
913 
914  // Restore SGPR from a stack slot.
915  // FIXME: We should use S_LOAD_DWORD here for VI.
916  if (!TmpVGPR.isValid())
917  TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
918  unsigned Align = FrameInfo.getObjectAlignment(Index);
919 
920  MachinePointerInfo PtrInfo
921  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
922 
923  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
924  MachineMemOperand::MOLoad, EltSize,
925  MinAlign(Align, EltSize * i));
926 
927  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
928  .addFrameIndex(Index) // vaddr
929  .addReg(MFI->getScratchRSrcReg()) // srsrc
930  .addReg(MFI->getStackPtrOffsetReg()) // soffset
931  .addImm(i * 4) // offset
932  .addMemOperand(MMO);
933 
934  auto MIB =
935  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
936  .addReg(TmpVGPR, RegState::Kill);
937 
938  if (NumSubRegs > 1)
939  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
940  }
941  }
942 
943  if (M0CopyReg != AMDGPU::NoRegister) {
944  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
945  .addReg(M0CopyReg, RegState::Kill);
946  }
947 
948  MI->eraseFromParent();
949  return true;
950 }
951 
952 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
953 /// a VGPR and the stack slot can be safely eliminated when all other users are
954 /// handled.
957  int FI,
958  RegScavenger *RS) const {
959  switch (MI->getOpcode()) {
960  case AMDGPU::SI_SPILL_S1024_SAVE:
961  case AMDGPU::SI_SPILL_S512_SAVE:
962  case AMDGPU::SI_SPILL_S256_SAVE:
963  case AMDGPU::SI_SPILL_S160_SAVE:
964  case AMDGPU::SI_SPILL_S128_SAVE:
965  case AMDGPU::SI_SPILL_S96_SAVE:
966  case AMDGPU::SI_SPILL_S64_SAVE:
967  case AMDGPU::SI_SPILL_S32_SAVE:
968  return spillSGPR(MI, FI, RS, true);
969  case AMDGPU::SI_SPILL_S1024_RESTORE:
970  case AMDGPU::SI_SPILL_S512_RESTORE:
971  case AMDGPU::SI_SPILL_S256_RESTORE:
972  case AMDGPU::SI_SPILL_S160_RESTORE:
973  case AMDGPU::SI_SPILL_S128_RESTORE:
974  case AMDGPU::SI_SPILL_S96_RESTORE:
975  case AMDGPU::SI_SPILL_S64_RESTORE:
976  case AMDGPU::SI_SPILL_S32_RESTORE:
977  return restoreSGPR(MI, FI, RS, true);
978  default:
979  llvm_unreachable("not an SGPR spill instruction");
980  }
981 }
982 
984  int SPAdj, unsigned FIOperandNum,
985  RegScavenger *RS) const {
986  MachineFunction *MF = MI->getParent()->getParent();
987  MachineBasicBlock *MBB = MI->getParent();
989  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
990  const SIInstrInfo *TII = ST.getInstrInfo();
991  DebugLoc DL = MI->getDebugLoc();
992 
993  assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
994 
995  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
996  int Index = MI->getOperand(FIOperandNum).getIndex();
997 
998  Register FrameReg = getFrameRegister(*MF);
999 
1000  switch (MI->getOpcode()) {
1001  // SGPR register spill
1002  case AMDGPU::SI_SPILL_S1024_SAVE:
1003  case AMDGPU::SI_SPILL_S512_SAVE:
1004  case AMDGPU::SI_SPILL_S256_SAVE:
1005  case AMDGPU::SI_SPILL_S160_SAVE:
1006  case AMDGPU::SI_SPILL_S128_SAVE:
1007  case AMDGPU::SI_SPILL_S96_SAVE:
1008  case AMDGPU::SI_SPILL_S64_SAVE:
1009  case AMDGPU::SI_SPILL_S32_SAVE: {
1010  spillSGPR(MI, Index, RS);
1011  break;
1012  }
1013 
1014  // SGPR register restore
1015  case AMDGPU::SI_SPILL_S1024_RESTORE:
1016  case AMDGPU::SI_SPILL_S512_RESTORE:
1017  case AMDGPU::SI_SPILL_S256_RESTORE:
1018  case AMDGPU::SI_SPILL_S160_RESTORE:
1019  case AMDGPU::SI_SPILL_S128_RESTORE:
1020  case AMDGPU::SI_SPILL_S96_RESTORE:
1021  case AMDGPU::SI_SPILL_S64_RESTORE:
1022  case AMDGPU::SI_SPILL_S32_RESTORE: {
1023  restoreSGPR(MI, Index, RS);
1024  break;
1025  }
1026 
1027  // VGPR register spill
1028  case AMDGPU::SI_SPILL_V1024_SAVE:
1029  case AMDGPU::SI_SPILL_V512_SAVE:
1030  case AMDGPU::SI_SPILL_V256_SAVE:
1031  case AMDGPU::SI_SPILL_V160_SAVE:
1032  case AMDGPU::SI_SPILL_V128_SAVE:
1033  case AMDGPU::SI_SPILL_V96_SAVE:
1034  case AMDGPU::SI_SPILL_V64_SAVE:
1035  case AMDGPU::SI_SPILL_V32_SAVE:
1036  case AMDGPU::SI_SPILL_A1024_SAVE:
1037  case AMDGPU::SI_SPILL_A512_SAVE:
1038  case AMDGPU::SI_SPILL_A128_SAVE:
1039  case AMDGPU::SI_SPILL_A64_SAVE:
1040  case AMDGPU::SI_SPILL_A32_SAVE: {
1041  const MachineOperand *VData = TII->getNamedOperand(*MI,
1042  AMDGPU::OpName::vdata);
1043  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1044  MFI->getStackPtrOffsetReg());
1045 
1046  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1047  Index,
1048  VData->getReg(), VData->isKill(),
1049  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1050  FrameReg,
1051  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1052  *MI->memoperands_begin(),
1053  RS);
1054  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1055  MI->eraseFromParent();
1056  break;
1057  }
1058  case AMDGPU::SI_SPILL_V32_RESTORE:
1059  case AMDGPU::SI_SPILL_V64_RESTORE:
1060  case AMDGPU::SI_SPILL_V96_RESTORE:
1061  case AMDGPU::SI_SPILL_V128_RESTORE:
1062  case AMDGPU::SI_SPILL_V160_RESTORE:
1063  case AMDGPU::SI_SPILL_V256_RESTORE:
1064  case AMDGPU::SI_SPILL_V512_RESTORE:
1065  case AMDGPU::SI_SPILL_V1024_RESTORE:
1066  case AMDGPU::SI_SPILL_A32_RESTORE:
1067  case AMDGPU::SI_SPILL_A64_RESTORE:
1068  case AMDGPU::SI_SPILL_A128_RESTORE:
1069  case AMDGPU::SI_SPILL_A512_RESTORE:
1070  case AMDGPU::SI_SPILL_A1024_RESTORE: {
1071  const MachineOperand *VData = TII->getNamedOperand(*MI,
1072  AMDGPU::OpName::vdata);
1073  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1074  MFI->getStackPtrOffsetReg());
1075 
1076  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1077  Index,
1078  VData->getReg(), VData->isKill(),
1079  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1080  FrameReg,
1081  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1082  *MI->memoperands_begin(),
1083  RS);
1084  MI->eraseFromParent();
1085  break;
1086  }
1087 
1088  default: {
1089  const DebugLoc &DL = MI->getDebugLoc();
1090  bool IsMUBUF = TII->isMUBUF(*MI);
1091 
1092  if (!IsMUBUF && !MFI->isEntryFunction()) {
1093  // Convert to an absolute stack address by finding the offset from the
1094  // scratch wave base and scaling by the wave size.
1095  //
1096  // In an entry function/kernel the offset is already the absolute
1097  // address relative to the frame register.
1098 
1099  Register TmpDiffReg =
1100  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1101 
1102  // If there's no free SGPR, in-place modify the FP
1103  Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg;
1104 
1105  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1106  Register ResultReg = IsCopy ?
1107  MI->getOperand(0).getReg() :
1108  RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1109 
1110  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1111  .addReg(FrameReg)
1113 
1114  int64_t Offset = FrameInfo.getObjectOffset(Index);
1115  if (Offset == 0) {
1116  // XXX - This never happens because of emergency scavenging slot at 0?
1117  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1118  .addImm(ST.getWavefrontSizeLog2())
1119  .addReg(DiffReg);
1120  } else {
1121  if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
1122  Register ScaledReg =
1123  RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0);
1124 
1125  BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
1126  ScaledReg)
1127  .addImm(ST.getWavefrontSizeLog2())
1128  .addReg(DiffReg, RegState::Kill);
1129 
1130  const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
1131 
1132  // TODO: Fold if use instruction is another add of a constant.
1133  if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1134  // FIXME: This can fail
1135  MIB.addImm(Offset);
1136  MIB.addReg(ScaledReg, RegState::Kill);
1137  if (!IsVOP2)
1138  MIB.addImm(0); // clamp bit
1139  } else {
1140  Register ConstOffsetReg =
1141  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false);
1142 
1143  // This should always be able to use the unused carry out.
1144  assert(ConstOffsetReg && "this scavenge should not be able to fail");
1145 
1146  BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1147  .addImm(Offset);
1148  MIB.addReg(ConstOffsetReg, RegState::Kill);
1149  MIB.addReg(ScaledReg, RegState::Kill);
1150  MIB.addImm(0); // clamp bit
1151  }
1152  } else {
1153  // We have to produce a carry out, and we there isn't a free SGPR
1154  // pair for it. We can keep the whole computation on the SALU to
1155  // avoid clobbering an additional register at the cost of an extra
1156  // mov.
1157 
1158  // We may have 1 free scratch SGPR even though a carry out is
1159  // unavailable. Only one additional mov is needed.
1160  Register TmpScaledReg =
1161  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1162  Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg;
1163 
1164  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1165  .addReg(DiffReg, RegState::Kill)
1167  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
1168  .addReg(ScaledReg, RegState::Kill)
1169  .addImm(Offset);
1170  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
1171  .addReg(ScaledReg, RegState::Kill);
1172 
1173  // If there were truly no free SGPRs, we need to undo everything.
1174  if (!TmpScaledReg.isValid()) {
1175  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
1176  .addReg(ScaledReg, RegState::Kill)
1177  .addImm(Offset);
1178  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
1179  .addReg(DiffReg, RegState::Kill)
1181  }
1182  }
1183  }
1184 
1185  if (!TmpDiffReg.isValid()) {
1186  // Restore the FP.
1187  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg)
1188  .addReg(FrameReg)
1190  }
1191 
1192  // Don't introduce an extra copy if we're just materializing in a mov.
1193  if (IsCopy)
1194  MI->eraseFromParent();
1195  else
1196  FIOp.ChangeToRegister(ResultReg, false, false, true);
1197  return;
1198  }
1199 
1200  if (IsMUBUF) {
1201  // Disable offen so we don't need a 0 vgpr base.
1202  assert(static_cast<int>(FIOperandNum) ==
1203  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1204  AMDGPU::OpName::vaddr));
1205 
1206  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1207  MFI->getStackPtrOffsetReg());
1208 
1209  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
1210 
1211  int64_t Offset = FrameInfo.getObjectOffset(Index);
1212  int64_t OldImm
1213  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1214  int64_t NewOffset = OldImm + Offset;
1215 
1216  if (isUInt<12>(NewOffset) &&
1217  buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
1218  MI->eraseFromParent();
1219  return;
1220  }
1221  }
1222 
1223  // If the offset is simply too big, don't convert to a scratch wave offset
1224  // relative index.
1225 
1226  int64_t Offset = FrameInfo.getObjectOffset(Index);
1227  FIOp.ChangeToImmediate(Offset);
1228  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1229  Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1230  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1231  .addImm(Offset);
1232  FIOp.ChangeToRegister(TmpReg, false, false, true);
1233  }
1234  }
1235  }
1236 }
1237 
1240 }
1241 
1242 // FIXME: This is very slow. It might be worth creating a map from physreg to
1243 // register class.
1246 
1247  static const TargetRegisterClass *const BaseClasses[] = {
1248  &AMDGPU::VGPR_32RegClass,
1249  &AMDGPU::SReg_32RegClass,
1250  &AMDGPU::AGPR_32RegClass,
1251  &AMDGPU::VReg_64RegClass,
1252  &AMDGPU::SReg_64RegClass,
1253  &AMDGPU::AReg_64RegClass,
1254  &AMDGPU::VReg_96RegClass,
1255  &AMDGPU::SReg_96RegClass,
1256  &AMDGPU::VReg_128RegClass,
1257  &AMDGPU::SReg_128RegClass,
1258  &AMDGPU::AReg_128RegClass,
1259  &AMDGPU::VReg_160RegClass,
1260  &AMDGPU::SReg_160RegClass,
1261  &AMDGPU::VReg_256RegClass,
1262  &AMDGPU::SReg_256RegClass,
1263  &AMDGPU::VReg_512RegClass,
1264  &AMDGPU::SReg_512RegClass,
1265  &AMDGPU::AReg_512RegClass,
1266  &AMDGPU::SReg_1024RegClass,
1267  &AMDGPU::VReg_1024RegClass,
1268  &AMDGPU::AReg_1024RegClass,
1269  &AMDGPU::SCC_CLASSRegClass,
1270  &AMDGPU::Pseudo_SReg_32RegClass,
1271  &AMDGPU::Pseudo_SReg_128RegClass,
1272  };
1273 
1274  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1275  if (BaseClass->contains(Reg)) {
1276  return BaseClass;
1277  }
1278  }
1279  return nullptr;
1280 }
1281 
1282 // TODO: It might be helpful to have some target specific flags in
1283 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1285  unsigned Size = getRegSizeInBits(*RC);
1286  switch (Size) {
1287  case 32:
1288  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1289  case 64:
1290  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1291  case 96:
1292  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1293  case 128:
1294  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1295  case 160:
1296  return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1297  case 256:
1298  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1299  case 512:
1300  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1301  case 1024:
1302  return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
1303  case 1:
1304  return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
1305  default:
1306  assert(Size < 32 && "Invalid register class size");
1307  return false;
1308  }
1309 }
1310 
1312  unsigned Size = getRegSizeInBits(*RC);
1313  if (Size < 32)
1314  return false;
1315  switch (Size) {
1316  case 32:
1317  return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
1318  case 64:
1319  return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
1320  case 96:
1321  return false;
1322  case 128:
1323  return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
1324  case 160:
1325  case 256:
1326  return false;
1327  case 512:
1328  return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
1329  case 1024:
1330  return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
1331  default:
1332  llvm_unreachable("Invalid register class size");
1333  }
1334 }
1335 
1337  const TargetRegisterClass *SRC) const {
1338  switch (getRegSizeInBits(*SRC)) {
1339  case 32:
1340  return &AMDGPU::VGPR_32RegClass;
1341  case 64:
1342  return &AMDGPU::VReg_64RegClass;
1343  case 96:
1344  return &AMDGPU::VReg_96RegClass;
1345  case 128:
1346  return &AMDGPU::VReg_128RegClass;
1347  case 160:
1348  return &AMDGPU::VReg_160RegClass;
1349  case 256:
1350  return &AMDGPU::VReg_256RegClass;
1351  case 512:
1352  return &AMDGPU::VReg_512RegClass;
1353  case 1024:
1354  return &AMDGPU::VReg_1024RegClass;
1355  case 1:
1356  return &AMDGPU::VReg_1RegClass;
1357  default:
1358  llvm_unreachable("Invalid register class size");
1359  }
1360 }
1361 
1363  const TargetRegisterClass *SRC) const {
1364  switch (getRegSizeInBits(*SRC)) {
1365  case 32:
1366  return &AMDGPU::AGPR_32RegClass;
1367  case 64:
1368  return &AMDGPU::AReg_64RegClass;
1369  case 128:
1370  return &AMDGPU::AReg_128RegClass;
1371  case 512:
1372  return &AMDGPU::AReg_512RegClass;
1373  case 1024:
1374  return &AMDGPU::AReg_1024RegClass;
1375  default:
1376  llvm_unreachable("Invalid register class size");
1377  }
1378 }
1379 
1381  const TargetRegisterClass *VRC) const {
1382  switch (getRegSizeInBits(*VRC)) {
1383  case 32:
1384  return &AMDGPU::SGPR_32RegClass;
1385  case 64:
1386  return &AMDGPU::SReg_64RegClass;
1387  case 96:
1388  return &AMDGPU::SReg_96RegClass;
1389  case 128:
1390  return &AMDGPU::SGPR_128RegClass;
1391  case 160:
1392  return &AMDGPU::SReg_160RegClass;
1393  case 256:
1394  return &AMDGPU::SReg_256RegClass;
1395  case 512:
1396  return &AMDGPU::SReg_512RegClass;
1397  case 1024:
1398  return &AMDGPU::SReg_1024RegClass;
1399  default:
1400  llvm_unreachable("Invalid register class size");
1401  }
1402 }
1403 
1405  const TargetRegisterClass *RC, unsigned SubIdx) const {
1406  if (SubIdx == AMDGPU::NoSubRegister)
1407  return RC;
1408 
1409  // We can assume that each lane corresponds to one 32-bit register.
1410  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1411  if (isSGPRClass(RC)) {
1412  switch (Count) {
1413  case 1:
1414  return &AMDGPU::SGPR_32RegClass;
1415  case 2:
1416  return &AMDGPU::SReg_64RegClass;
1417  case 3:
1418  return &AMDGPU::SReg_96RegClass;
1419  case 4:
1420  return &AMDGPU::SGPR_128RegClass;
1421  case 5:
1422  return &AMDGPU::SReg_160RegClass;
1423  case 8:
1424  return &AMDGPU::SReg_256RegClass;
1425  case 16:
1426  return &AMDGPU::SReg_512RegClass;
1427  case 32: /* fall-through */
1428  default:
1429  llvm_unreachable("Invalid sub-register class size");
1430  }
1431  } else if (hasAGPRs(RC)) {
1432  switch (Count) {
1433  case 1:
1434  return &AMDGPU::AGPR_32RegClass;
1435  case 2:
1436  return &AMDGPU::AReg_64RegClass;
1437  case 4:
1438  return &AMDGPU::AReg_128RegClass;
1439  case 16:
1440  return &AMDGPU::AReg_512RegClass;
1441  case 32: /* fall-through */
1442  default:
1443  llvm_unreachable("Invalid sub-register class size");
1444  }
1445  } else {
1446  switch (Count) {
1447  case 1:
1448  return &AMDGPU::VGPR_32RegClass;
1449  case 2:
1450  return &AMDGPU::VReg_64RegClass;
1451  case 3:
1452  return &AMDGPU::VReg_96RegClass;
1453  case 4:
1454  return &AMDGPU::VReg_128RegClass;
1455  case 5:
1456  return &AMDGPU::VReg_160RegClass;
1457  case 8:
1458  return &AMDGPU::VReg_256RegClass;
1459  case 16:
1460  return &AMDGPU::VReg_512RegClass;
1461  case 32: /* fall-through */
1462  default:
1463  llvm_unreachable("Invalid sub-register class size");
1464  }
1465  }
1466 }
1467 
1468 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
1469  if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
1471  return !ST.hasMFMAInlineLiteralBug();
1472 
1473  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1474  OpType <= AMDGPU::OPERAND_SRC_LAST;
1475 }
1476 
1478  const TargetRegisterClass *DefRC,
1479  unsigned DefSubReg,
1480  const TargetRegisterClass *SrcRC,
1481  unsigned SrcSubReg) const {
1482  // We want to prefer the smallest register class possible, so we don't want to
1483  // stop and rewrite on anything that looks like a subregister
1484  // extract. Operations mostly don't care about the super register class, so we
1485  // only want to stop on the most basic of copies between the same register
1486  // class.
1487  //
1488  // e.g. if we have something like
1489  // %0 = ...
1490  // %1 = ...
1491  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1492  // %3 = COPY %2, sub0
1493  //
1494  // We want to look through the COPY to find:
1495  // => %3 = COPY %0
1496 
1497  // Plain copy.
1498  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1499 }
1500 
1501 /// Returns a register that is not used at any point in the function.
1502 /// If all registers are used, then this function will return
1503 // AMDGPU::NoRegister.
1504 unsigned
1506  const TargetRegisterClass *RC,
1507  const MachineFunction &MF) const {
1508 
1509  for (unsigned Reg : *RC)
1510  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1511  return Reg;
1512  return AMDGPU::NoRegister;
1513 }
1514 
1516  unsigned EltSize) const {
1517  if (EltSize == 4) {
1518  static const int16_t Sub0_31[] = {
1519  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1520  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1521  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1522  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1523  AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
1524  AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
1525  AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
1526  AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
1527  };
1528 
1529  static const int16_t Sub0_15[] = {
1530  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1531  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1532  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1533  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1534  };
1535 
1536  static const int16_t Sub0_7[] = {
1537  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1538  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1539  };
1540 
1541  static const int16_t Sub0_4[] = {
1542  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1543  };
1544 
1545  static const int16_t Sub0_3[] = {
1546  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1547  };
1548 
1549  static const int16_t Sub0_2[] = {
1550  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1551  };
1552 
1553  static const int16_t Sub0_1[] = {
1554  AMDGPU::sub0, AMDGPU::sub1,
1555  };
1556 
1557  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1558  case 32:
1559  return {};
1560  case 64:
1561  return makeArrayRef(Sub0_1);
1562  case 96:
1563  return makeArrayRef(Sub0_2);
1564  case 128:
1565  return makeArrayRef(Sub0_3);
1566  case 160:
1567  return makeArrayRef(Sub0_4);
1568  case 256:
1569  return makeArrayRef(Sub0_7);
1570  case 512:
1571  return makeArrayRef(Sub0_15);
1572  case 1024:
1573  return makeArrayRef(Sub0_31);
1574  default:
1575  llvm_unreachable("unhandled register size");
1576  }
1577  }
1578 
1579  if (EltSize == 8) {
1580  static const int16_t Sub0_31_64[] = {
1581  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1582  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1583  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1584  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1585  AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
1586  AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
1587  AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
1588  AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
1589  };
1590 
1591  static const int16_t Sub0_15_64[] = {
1592  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1593  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1594  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1595  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1596  };
1597 
1598  static const int16_t Sub0_7_64[] = {
1599  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1600  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1601  };
1602 
1603 
1604  static const int16_t Sub0_3_64[] = {
1605  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1606  };
1607 
1608  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1609  case 64:
1610  return {};
1611  case 128:
1612  return makeArrayRef(Sub0_3_64);
1613  case 256:
1614  return makeArrayRef(Sub0_7_64);
1615  case 512:
1616  return makeArrayRef(Sub0_15_64);
1617  case 1024:
1618  return makeArrayRef(Sub0_31_64);
1619  default:
1620  llvm_unreachable("unhandled register size");
1621  }
1622  }
1623 
1624  if (EltSize == 16) {
1625 
1626  static const int16_t Sub0_31_128[] = {
1627  AMDGPU::sub0_sub1_sub2_sub3,
1628  AMDGPU::sub4_sub5_sub6_sub7,
1629  AMDGPU::sub8_sub9_sub10_sub11,
1630  AMDGPU::sub12_sub13_sub14_sub15,
1631  AMDGPU::sub16_sub17_sub18_sub19,
1632  AMDGPU::sub20_sub21_sub22_sub23,
1633  AMDGPU::sub24_sub25_sub26_sub27,
1634  AMDGPU::sub28_sub29_sub30_sub31
1635  };
1636 
1637  static const int16_t Sub0_15_128[] = {
1638  AMDGPU::sub0_sub1_sub2_sub3,
1639  AMDGPU::sub4_sub5_sub6_sub7,
1640  AMDGPU::sub8_sub9_sub10_sub11,
1641  AMDGPU::sub12_sub13_sub14_sub15
1642  };
1643 
1644  static const int16_t Sub0_7_128[] = {
1645  AMDGPU::sub0_sub1_sub2_sub3,
1646  AMDGPU::sub4_sub5_sub6_sub7
1647  };
1648 
1649  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1650  case 128:
1651  return {};
1652  case 256:
1653  return makeArrayRef(Sub0_7_128);
1654  case 512:
1655  return makeArrayRef(Sub0_15_128);
1656  case 1024:
1657  return makeArrayRef(Sub0_31_128);
1658  default:
1659  llvm_unreachable("unhandled register size");
1660  }
1661  }
1662 
1663  assert(EltSize == 32 && "unhandled elt size");
1664 
1665  static const int16_t Sub0_31_256[] = {
1666  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1667  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1668  AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1669  AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1670  };
1671 
1672  static const int16_t Sub0_15_256[] = {
1673  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1674  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1675  };
1676 
1677  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1678  case 256:
1679  return {};
1680  case 512:
1681  return makeArrayRef(Sub0_15_256);
1682  case 1024:
1683  return makeArrayRef(Sub0_31_256);
1684  default:
1685  llvm_unreachable("unhandled register size");
1686  }
1687 }
1688 
1689 const TargetRegisterClass*
1691  unsigned Reg) const {
1692  if (Register::isVirtualRegister(Reg))
1693  return MRI.getRegClass(Reg);
1694 
1695  return getPhysRegClass(Reg);
1696 }
1697 
1699  unsigned Reg) const {
1700  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1701  assert(RC && "Register class for the reg not found");
1702  return hasVGPRs(RC);
1703 }
1704 
1706  unsigned Reg) const {
1707  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1708  assert(RC && "Register class for the reg not found");
1709  return hasAGPRs(RC);
1710 }
1711 
1713  const TargetRegisterClass *SrcRC,
1714  unsigned SubReg,
1715  const TargetRegisterClass *DstRC,
1716  unsigned DstSubReg,
1717  const TargetRegisterClass *NewRC,
1718  LiveIntervals &LIS) const {
1719  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1720  unsigned DstSize = getRegSizeInBits(*DstRC);
1721  unsigned NewSize = getRegSizeInBits(*NewRC);
1722 
1723  // Do not increase size of registers beyond dword, we would need to allocate
1724  // adjacent registers and constraint regalloc more than needed.
1725 
1726  // Always allow dword coalescing.
1727  if (SrcSize <= 32 || DstSize <= 32)
1728  return true;
1729 
1730  return NewSize <= DstSize || NewSize <= SrcSize;
1731 }
1732 
1734  MachineFunction &MF) const {
1736 
1737  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1738  MF.getFunction());
1739  switch (RC->getID()) {
1740  default:
1741  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1742  case AMDGPU::VGPR_32RegClassID:
1743  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1744  case AMDGPU::SGPR_32RegClassID:
1745  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1746  }
1747 }
1748 
1750  unsigned Idx) const {
1751  if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
1752  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1753  const_cast<MachineFunction &>(MF));
1754 
1755  if (Idx == getSGPRPressureSet())
1756  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1757  const_cast<MachineFunction &>(MF));
1758 
1759  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1760 }
1761 
1762 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1763  static const int Empty[] = { -1 };
1764 
1765  if (hasRegUnit(AMDGPU::M0, RegUnit))
1766  return Empty;
1767  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1768 }
1769 
1771  // Not a callee saved register.
1772  return AMDGPU::SGPR30_SGPR31;
1773 }
1774 
1775 const TargetRegisterClass *
1777  const RegisterBank &RB,
1778  const MachineRegisterInfo &MRI) const {
1779  switch (Size) {
1780  case 1: {
1781  switch (RB.getID()) {
1782  case AMDGPU::VGPRRegBankID:
1783  return &AMDGPU::VGPR_32RegClass;
1784  case AMDGPU::VCCRegBankID:
1785  return isWave32 ?
1786  &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
1787  case AMDGPU::SGPRRegBankID:
1788  return &AMDGPU::SReg_32RegClass;
1789  case AMDGPU::SCCRegBankID:
1790  // This needs to return an allocatable class, so don't bother returning
1791  // the dummy SCC class.
1792  //
1793  // FIXME: This is a grotesque hack. We use SGPR_32 as an indication this
1794  // was not an VCC bank value since we use the larger class SReg_32 for
1795  // other values. These should all use SReg_32.
1796  return &AMDGPU::SGPR_32RegClass;
1797  default:
1798  llvm_unreachable("unknown register bank");
1799  }
1800  }
1801  case 32:
1802  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1803  &AMDGPU::SReg_32RegClass;
1804  case 64:
1805  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1806  &AMDGPU::SReg_64_XEXECRegClass;
1807  case 96:
1808  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1809  &AMDGPU::SReg_96RegClass;
1810  case 128:
1811  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1812  &AMDGPU::SGPR_128RegClass;
1813  case 160:
1814  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1815  &AMDGPU::SReg_160RegClass;
1816  case 256:
1817  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1818  &AMDGPU::SReg_256RegClass;
1819  case 512:
1820  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1821  &AMDGPU::SReg_512RegClass;
1822  case 1024:
1823  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
1824  &AMDGPU::SReg_1024RegClass;
1825  default:
1826  if (Size < 32)
1827  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1828  &AMDGPU::SReg_32RegClass;
1829  return nullptr;
1830  }
1831 }
1832 
1833 const TargetRegisterClass *
1835  const MachineRegisterInfo &MRI) const {
1836  const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
1837  if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
1838  return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
1839 
1840  const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
1841  return getAllocatableClass(RC);
1842 }
1843 
1844 unsigned SIRegisterInfo::getVCC() const {
1845  return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
1846 }
1847 
1848 const TargetRegisterClass *
1849 SIRegisterInfo::getRegClass(unsigned RCID) const {
1850  switch ((int)RCID) {
1851  case AMDGPU::SReg_1RegClassID:
1852  return getBoolRC();
1853  case AMDGPU::SReg_1_XEXECRegClassID:
1854  return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
1855  : &AMDGPU::SReg_64_XEXECRegClass;
1856  case -1:
1857  return nullptr;
1858  default:
1859  return AMDGPURegisterInfo::getRegClass(RCID);
1860  }
1861 }
1862 
1863 // Find reaching register definition
1865  MachineInstr &Use,
1867  LiveIntervals *LIS) const {
1868  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1869  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1870  SlotIndex DefIdx;
1871 
1872  if (Register::isVirtualRegister(Reg)) {
1873  if (!LIS->hasInterval(Reg))
1874  return nullptr;
1875  LiveInterval &LI = LIS->getInterval(Reg);
1876  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1877  : MRI.getMaxLaneMaskForVReg(Reg);
1878  VNInfo *V = nullptr;
1879  if (LI.hasSubRanges()) {
1880  for (auto &S : LI.subranges()) {
1881  if ((S.LaneMask & SubLanes) == SubLanes) {
1882  V = S.getVNInfoAt(UseIdx);
1883  break;
1884  }
1885  }
1886  } else {
1887  V = LI.getVNInfoAt(UseIdx);
1888  }
1889  if (!V)
1890  return nullptr;
1891  DefIdx = V->def;
1892  } else {
1893  // Find last def.
1894  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1895  LiveRange &LR = LIS->getRegUnit(*Units);
1896  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1897  if (!DefIdx.isValid() ||
1898  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1899  LIS->getInstructionFromIndex(V->def)))
1900  DefIdx = V->def;
1901  } else {
1902  return nullptr;
1903  }
1904  }
1905  }
1906 
1907  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1908 
1909  if (!Def || !MDT.dominates(Def, &Use))
1910  return nullptr;
1911 
1912  assert(Def->modifiesRegister(Reg, this));
1913 
1914  return Def;
1915 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
unsigned getVCC() const
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:679
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:853
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:476
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
bool canRealignStack(const MachineFunction &MF) const override
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:194
static unsigned getNumSubRegsForSpillOp(unsigned Op)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:737
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:764
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static int getRegClass(RegisterKind Is, unsigned RegWidth)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:792
virtual bool canRealignStack(const MachineFunction &MF) const
True if the stack can be realigned for the target.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
Definition: SIInstrInfo.h:929
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasInterval(Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:443
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
static const char * getRegisterName(unsigned RegNo)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
void setReg(Register Reg)
Change the register this operand corresponds to.
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:412
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
bool hasVGPRs(const TargetRegisterClass *RC) const
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
LiveInterval & getInterval(Register Reg)
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
bool isAGPRPressureSet(unsigned SetID) const
bool hasAGPRs(const TargetRegisterClass *RC) const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
bool hasMFMAInlineLiteralBug() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg) const
Return a partially built integer add instruction without carry.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:424
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool isValid() const
Definition: Register.h:115
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
const TargetRegisterClass * getBoolRC() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getAGPRPressureSet() const
Flags getFlags() const
Return the raw flags of the source value,.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static int getOffsetMUBUFStore(unsigned Opc)
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool hasMAIInsts() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned getWavefrontSizeLog2() const
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
bool hasCalls() const
Return true if the current function has any function calls.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:156