LLVM  10.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/LLVMContext.h"
29 
30 using namespace llvm;
31 
32 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
33  for (unsigned i = 0; PSets[i] != -1; ++i) {
34  if (PSets[i] == (int)PSetID)
35  return true;
36  }
37  return false;
38 }
39 
40 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
41  BitVector &PressureSets) const {
42  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
43  const int *PSets = getRegUnitPressureSets(*U);
44  if (hasPressureSet(PSets, PSetID)) {
45  PressureSets.set(PSetID);
46  break;
47  }
48  }
49 }
50 
52  "amdgpu-spill-sgpr-to-smem",
53  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
54  cl::init(false));
55 
57  "amdgpu-spill-sgpr-to-vgpr",
58  cl::desc("Enable spilling VGPRs to SGPRs"),
60  cl::init(true));
61 
64  ST(ST),
65  SGPRPressureSets(getNumRegPressureSets()),
66  VGPRPressureSets(getNumRegPressureSets()),
67  AGPRPressureSets(getNumRegPressureSets()),
68  SpillSGPRToVGPR(false),
69  SpillSGPRToSMEM(false),
70  isWave32(ST.isWave32()) {
71  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
72  SpillSGPRToSMEM = true;
73  else if (EnableSpillSGPRToVGPR)
74  SpillSGPRToVGPR = true;
75 
76  unsigned NumRegPressureSets = getNumRegPressureSets();
77 
78  SGPRSetID = NumRegPressureSets;
79  VGPRSetID = NumRegPressureSets;
80  AGPRSetID = NumRegPressureSets;
81 
82  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
83  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
84  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
85  classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
86  }
87 
88  // Determine the number of reg units for each pressure set.
89  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
90  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
91  const int *PSets = getRegUnitPressureSets(i);
92  for (unsigned j = 0; PSets[j] != -1; ++j) {
93  ++PressureSetRegUnits[PSets[j]];
94  }
95  }
96 
97  unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
98  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
99  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
100  VGPRSetID = i;
101  VGPRMax = PressureSetRegUnits[i];
102  continue;
103  }
104  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
105  SGPRSetID = i;
106  SGPRMax = PressureSetRegUnits[i];
107  }
108  if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
109  AGPRSetID = i;
110  AGPRMax = PressureSetRegUnits[i];
111  continue;
112  }
113  }
114 
115  assert(SGPRSetID < NumRegPressureSets &&
116  VGPRSetID < NumRegPressureSets &&
117  AGPRSetID < NumRegPressureSets);
118 }
119 
121  const MachineFunction &MF) const {
122 
123  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
124  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
125  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
126  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
127 }
128 
129 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
130  unsigned Reg;
131 
132  // Try to place it in a hole after PrivateSegmentBufferReg.
133  if (RegCount & 3) {
134  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
135  // alignment constraints, so we have a hole where can put the wave offset.
136  Reg = RegCount - 1;
137  } else {
138  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
139  // wave offset before it.
140  Reg = RegCount - 5;
141  }
142 
143  return Reg;
144 }
145 
147  const MachineFunction &MF) const {
148  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
150  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
151 }
152 
154  BitVector Reserved(getNumRegs());
155 
156  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
157  // this seems likely to result in bugs, so I'm marking them as reserved.
158  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
159  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
160 
161  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
162  reserveRegisterTuples(Reserved, AMDGPU::M0);
163 
164  // Reserve src_vccz, src_execz, src_scc.
165  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
166  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
167  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
168 
169  // Reserve the memory aperture registers.
170  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
171  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
172  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
173  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
174 
175  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
176  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
177 
178  // Reserve xnack_mask registers - support is not implemented in Codegen.
179  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
180 
181  // Reserve lds_direct register - support is not implemented in Codegen.
182  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
183 
184  // Reserve Trap Handler registers - support is not implemented in Codegen.
185  reserveRegisterTuples(Reserved, AMDGPU::TBA);
186  reserveRegisterTuples(Reserved, AMDGPU::TMA);
187  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
188  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
189  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
190  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
191  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
192  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
193  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
194  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
195 
196  // Reserve null register - it shall never be allocated
197  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
198 
199  // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
200  // will result in bugs.
201  if (isWave32) {
202  Reserved.set(AMDGPU::VCC);
203  Reserved.set(AMDGPU::VCC_HI);
204  }
205 
206  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
207 
208  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
209  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
210  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
211  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
212  reserveRegisterTuples(Reserved, Reg);
213  }
214 
215  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
216  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
217  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
218  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
219  reserveRegisterTuples(Reserved, Reg);
220  Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
221  reserveRegisterTuples(Reserved, Reg);
222  }
223 
224  // Reserve all the rest AGPRs if there are no instructions to use it.
225  if (!ST.hasMAIInsts()) {
226  for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
227  unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
228  reserveRegisterTuples(Reserved, Reg);
229  }
230  }
231 
233 
234  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
235  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
236  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
237  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
238  }
239 
240  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
241  if (ScratchRSrcReg != AMDGPU::NoRegister) {
242  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
243  // to spill.
244  // TODO: May need to reserve a VGPR if doing LDS spilling.
245  reserveRegisterTuples(Reserved, ScratchRSrcReg);
246  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
247  }
248 
249  // We have to assume the SP is needed in case there are calls in the function,
250  // which is detected after the function is lowered. If we aren't really going
251  // to need SP, don't bother reserving it.
252  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
253 
254  if (StackPtrReg != AMDGPU::NoRegister) {
255  reserveRegisterTuples(Reserved, StackPtrReg);
256  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
257  }
258 
259  unsigned FrameReg = MFI->getFrameOffsetReg();
260  if (FrameReg != AMDGPU::NoRegister) {
261  reserveRegisterTuples(Reserved, FrameReg);
262  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
263  }
264 
265  for (unsigned Reg : MFI->WWMReservedRegs) {
266  reserveRegisterTuples(Reserved, Reg);
267  }
268 
269  // FIXME: Stop using reserved registers for this.
270  for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
271  reserveRegisterTuples(Reserved, Reg);
272 
273  for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
274  reserveRegisterTuples(Reserved, Reg);
275 
276  return Reserved;
277 }
278 
281  // On entry, the base address is 0, so it can't possibly need any more
282  // alignment.
283 
284  // FIXME: Should be able to specify the entry frame alignment per calling
285  // convention instead.
286  if (Info->isEntryFunction())
287  return false;
288 
290 }
291 
294  if (Info->isEntryFunction()) {
295  const MachineFrameInfo &MFI = Fn.getFrameInfo();
296  return MFI.hasStackObjects() || MFI.hasCalls();
297  }
298 
299  // May need scavenger for dealing with callee saved registers.
300  return true;
301 }
302 
304  const MachineFunction &MF) const {
305  // Do not use frame virtual registers. They used to be used for SGPRs, but
306  // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
307  // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
308  // spill.
309  return false;
310 }
311 
313  const MachineFunction &MF) const {
314  const MachineFrameInfo &MFI = MF.getFrameInfo();
315  return MFI.hasStackObjects();
316 }
317 
319  const MachineFunction &) const {
320  // There are no special dedicated stack or frame pointers.
321  return true;
322 }
323 
325  // This helps catch bugs as verifier errors.
326  return true;
327 }
328 
331 
332  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
333  AMDGPU::OpName::offset);
334  return MI->getOperand(OffIdx).getImm();
335 }
336 
338  int Idx) const {
339  if (!SIInstrInfo::isMUBUF(*MI))
340  return 0;
341 
343  AMDGPU::OpName::vaddr) &&
344  "Should never see frame index on non-address operand");
345 
346  return getMUBUFInstrOffset(MI);
347 }
348 
350  if (!MI->mayLoadOrStore())
351  return false;
352 
353  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
354 
355  return !isUInt<12>(FullOffset);
356 }
357 
359  unsigned BaseReg,
360  int FrameIdx,
361  int64_t Offset) const {
363  DebugLoc DL; // Defaults to "unknown"
364 
365  if (Ins != MBB->end())
366  DL = Ins->getDebugLoc();
367 
368  MachineFunction *MF = MBB->getParent();
369  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
370  const SIInstrInfo *TII = Subtarget.getInstrInfo();
371 
372  if (Offset == 0) {
373  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
374  .addFrameIndex(FrameIdx);
375  return;
376  }
377 
379  Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
380 
381  Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
382 
383  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
384  .addImm(Offset);
385  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
386  .addFrameIndex(FrameIdx);
387 
388  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
389  .addReg(OffsetReg, RegState::Kill)
390  .addReg(FIReg)
391  .addImm(0); // clamp bit
392 }
393 
395  int64_t Offset) const {
396 
397  MachineBasicBlock *MBB = MI.getParent();
398  MachineFunction *MF = MBB->getParent();
399  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
400  const SIInstrInfo *TII = Subtarget.getInstrInfo();
401 
402 #ifndef NDEBUG
403  // FIXME: Is it possible to be storing a frame index to itself?
404  bool SeenFI = false;
405  for (const MachineOperand &MO: MI.operands()) {
406  if (MO.isFI()) {
407  if (SeenFI)
408  llvm_unreachable("should not see multiple frame indices");
409 
410  SeenFI = true;
411  }
412  }
413 #endif
414 
415  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
416  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
417  assert(TII->isMUBUF(MI));
418  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
419  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
420  "should only be seeing frame offset relative FrameIndex");
421 
422 
423  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
424  int64_t NewOffset = OffsetOp->getImm() + Offset;
425  assert(isUInt<12>(NewOffset) && "offset should be legal");
426 
427  FIOp->ChangeToRegister(BaseReg, false);
428  OffsetOp->setImm(NewOffset);
429 }
430 
432  unsigned BaseReg,
433  int64_t Offset) const {
434  if (!SIInstrInfo::isMUBUF(*MI))
435  return false;
436 
437  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
438 
439  return isUInt<12>(NewOffset);
440 }
441 
443  const MachineFunction &MF, unsigned Kind) const {
444  // This is inaccurate. It depends on the instruction and address space. The
445  // only place where we should hit this is for dealing with frame indexes /
446  // private accesses, so this is correct in that case.
447  return &AMDGPU::VGPR_32RegClass;
448 }
449 
450 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
451 
452  switch (Op) {
453  case AMDGPU::SI_SPILL_S1024_SAVE:
454  case AMDGPU::SI_SPILL_S1024_RESTORE:
455  case AMDGPU::SI_SPILL_V1024_SAVE:
456  case AMDGPU::SI_SPILL_V1024_RESTORE:
457  case AMDGPU::SI_SPILL_A1024_SAVE:
458  case AMDGPU::SI_SPILL_A1024_RESTORE:
459  return 32;
460  case AMDGPU::SI_SPILL_S512_SAVE:
461  case AMDGPU::SI_SPILL_S512_RESTORE:
462  case AMDGPU::SI_SPILL_V512_SAVE:
463  case AMDGPU::SI_SPILL_V512_RESTORE:
464  case AMDGPU::SI_SPILL_A512_SAVE:
465  case AMDGPU::SI_SPILL_A512_RESTORE:
466  return 16;
467  case AMDGPU::SI_SPILL_S256_SAVE:
468  case AMDGPU::SI_SPILL_S256_RESTORE:
469  case AMDGPU::SI_SPILL_V256_SAVE:
470  case AMDGPU::SI_SPILL_V256_RESTORE:
471  return 8;
472  case AMDGPU::SI_SPILL_S160_SAVE:
473  case AMDGPU::SI_SPILL_S160_RESTORE:
474  case AMDGPU::SI_SPILL_V160_SAVE:
475  case AMDGPU::SI_SPILL_V160_RESTORE:
476  return 5;
477  case AMDGPU::SI_SPILL_S128_SAVE:
478  case AMDGPU::SI_SPILL_S128_RESTORE:
479  case AMDGPU::SI_SPILL_V128_SAVE:
480  case AMDGPU::SI_SPILL_V128_RESTORE:
481  case AMDGPU::SI_SPILL_A128_SAVE:
482  case AMDGPU::SI_SPILL_A128_RESTORE:
483  return 4;
484  case AMDGPU::SI_SPILL_S96_SAVE:
485  case AMDGPU::SI_SPILL_S96_RESTORE:
486  case AMDGPU::SI_SPILL_V96_SAVE:
487  case AMDGPU::SI_SPILL_V96_RESTORE:
488  return 3;
489  case AMDGPU::SI_SPILL_S64_SAVE:
490  case AMDGPU::SI_SPILL_S64_RESTORE:
491  case AMDGPU::SI_SPILL_V64_SAVE:
492  case AMDGPU::SI_SPILL_V64_RESTORE:
493  case AMDGPU::SI_SPILL_A64_SAVE:
494  case AMDGPU::SI_SPILL_A64_RESTORE:
495  return 2;
496  case AMDGPU::SI_SPILL_S32_SAVE:
497  case AMDGPU::SI_SPILL_S32_RESTORE:
498  case AMDGPU::SI_SPILL_V32_SAVE:
499  case AMDGPU::SI_SPILL_V32_RESTORE:
500  case AMDGPU::SI_SPILL_A32_SAVE:
501  case AMDGPU::SI_SPILL_A32_RESTORE:
502  return 1;
503  default: llvm_unreachable("Invalid spill opcode");
504  }
505 }
506 
507 static int getOffsetMUBUFStore(unsigned Opc) {
508  switch (Opc) {
509  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
510  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
511  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
512  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
513  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
514  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
515  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
516  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
517  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
518  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
519  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
520  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
521  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
522  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
523  default:
524  return -1;
525  }
526 }
527 
528 static int getOffsetMUBUFLoad(unsigned Opc) {
529  switch (Opc) {
530  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
531  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
532  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
533  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
534  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
535  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
536  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
537  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
538  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
539  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
540  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
541  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
542  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
543  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
544  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
545  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
546  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
547  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
548  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
549  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
550  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
551  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
552  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
553  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
554  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
555  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
556  default:
557  return -1;
558  }
559 }
560 
562  int Index,
563  unsigned Lane,
564  unsigned ValueReg,
565  bool IsKill) {
566  MachineBasicBlock *MBB = MI->getParent();
567  MachineFunction *MF = MI->getParent()->getParent();
569  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
570  const SIInstrInfo *TII = ST.getInstrInfo();
571 
572  MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
573 
574  if (Reg == AMDGPU::NoRegister)
575  return MachineInstrBuilder();
576 
577  bool IsStore = MI->mayStore();
579  auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
580 
581  unsigned Dst = IsStore ? Reg : ValueReg;
582  unsigned Src = IsStore ? ValueReg : Reg;
583  unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
584  : AMDGPU::V_ACCVGPR_READ_B32;
585 
586  return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
587  .addReg(Src, getKillRegState(IsKill));
588 }
589 
590 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
591 // need to handle the case where an SGPR may need to be spilled while spilling.
593  MachineFrameInfo &MFI,
595  int Index,
596  int64_t Offset) {
597  MachineBasicBlock *MBB = MI->getParent();
598  const DebugLoc &DL = MI->getDebugLoc();
599  bool IsStore = MI->mayStore();
600 
601  unsigned Opc = MI->getOpcode();
602  int LoadStoreOp = IsStore ?
604  if (LoadStoreOp == -1)
605  return false;
606 
607  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
608  if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
609  return true;
610 
611  MachineInstrBuilder NewMI =
612  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
613  .add(*Reg)
614  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
615  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
616  .addImm(Offset)
617  .addImm(0) // glc
618  .addImm(0) // slc
619  .addImm(0) // tfe
620  .addImm(0) // dlc
621  .cloneMemRefs(*MI);
622 
623  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
624  AMDGPU::OpName::vdata_in);
625  if (VDataIn)
626  NewMI.add(*VDataIn);
627  return true;
628 }
629 
630 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
631  unsigned LoadStoreOp,
632  int Index,
633  unsigned ValueReg,
634  bool IsKill,
635  unsigned ScratchRsrcReg,
636  unsigned ScratchOffsetReg,
637  int64_t InstOffset,
638  MachineMemOperand *MMO,
639  RegScavenger *RS) const {
640  MachineBasicBlock *MBB = MI->getParent();
641  MachineFunction *MF = MI->getParent()->getParent();
642  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
643  const SIInstrInfo *TII = ST.getInstrInfo();
644  const MachineFrameInfo &MFI = MF->getFrameInfo();
645 
646  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
647  const DebugLoc &DL = MI->getDebugLoc();
648  bool IsStore = Desc.mayStore();
649 
650  bool Scavenged = false;
651  unsigned SOffset = ScratchOffsetReg;
652 
653  const unsigned EltSize = 4;
654  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
655  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
656  unsigned Size = NumSubRegs * EltSize;
657  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
658  int64_t ScratchOffsetRegDelta = 0;
659 
660  unsigned Align = MFI.getObjectAlignment(Index);
661  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
662 
663  Register TmpReg =
664  hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
665  : Register();
666 
667  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
668 
669  if (!isUInt<12>(Offset + Size - EltSize)) {
670  SOffset = AMDGPU::NoRegister;
671 
672  // We currently only support spilling VGPRs to EltSize boundaries, meaning
673  // we can simplify the adjustment of Offset here to just scale with
674  // WavefrontSize.
675  Offset *= ST.getWavefrontSize();
676 
677  // We don't have access to the register scavenger if this function is called
678  // during PEI::scavengeFrameVirtualRegs().
679  if (RS)
680  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
681 
682  if (SOffset == AMDGPU::NoRegister) {
683  // There are no free SGPRs, and since we are in the process of spilling
684  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
685  // on SI/CI and on VI it is true until we implement spilling using scalar
686  // stores), we have no way to free up an SGPR. Our solution here is to
687  // add the offset directly to the ScratchOffset register, and then
688  // subtract the offset after the spill to return ScratchOffset to it's
689  // original value.
690  SOffset = ScratchOffsetReg;
691  ScratchOffsetRegDelta = Offset;
692  } else {
693  Scavenged = true;
694  }
695 
696  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
697  .addReg(ScratchOffsetReg)
698  .addImm(Offset);
699 
700  Offset = 0;
701  }
702 
703  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
704  Register SubReg = NumSubRegs == 1
705  ? Register(ValueReg)
706  : getSubReg(ValueReg, getSubRegFromChannel(i));
707 
708  unsigned SOffsetRegState = 0;
709  unsigned SrcDstRegState = getDefRegState(!IsStore);
710  if (i + 1 == e) {
711  SOffsetRegState |= getKillRegState(Scavenged);
712  // The last implicit use carries the "Kill" flag.
713  SrcDstRegState |= getKillRegState(IsKill);
714  }
715 
716  auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
717 
718  if (!MIB.getInstr()) {
719  unsigned FinalReg = SubReg;
720  if (TmpReg != AMDGPU::NoRegister) {
721  if (IsStore)
722  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
723  .addReg(SubReg, getKillRegState(IsKill));
724  SubReg = TmpReg;
725  }
726 
727  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
728  MachineMemOperand *NewMMO
729  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
730  EltSize, MinAlign(Align, EltSize * i));
731 
732  MIB = BuildMI(*MBB, MI, DL, Desc)
733  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
734  .addReg(ScratchRsrcReg)
735  .addReg(SOffset, SOffsetRegState)
736  .addImm(Offset)
737  .addImm(0) // glc
738  .addImm(0) // slc
739  .addImm(0) // tfe
740  .addImm(0) // dlc
741  .addMemOperand(NewMMO);
742 
743  if (!IsStore && TmpReg != AMDGPU::NoRegister)
744  MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
745  FinalReg)
746  .addReg(TmpReg, RegState::Kill);
747  }
748 
749  if (NumSubRegs > 1)
750  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
751  }
752 
753  if (ScratchOffsetRegDelta != 0) {
754  // Subtract the offset we added to the ScratchOffset register.
755  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
756  .addReg(ScratchOffsetReg)
757  .addImm(ScratchOffsetRegDelta);
758  }
759 }
760 
761 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
762  bool Store) {
763  if (SuperRegSize % 16 == 0) {
764  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
765  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
766  }
767 
768  if (SuperRegSize % 8 == 0) {
769  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
770  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
771  }
772 
773  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
774  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
775 }
776 
778  int Index,
779  RegScavenger *RS,
780  bool OnlyToVGPR) const {
781  MachineBasicBlock *MBB = MI->getParent();
782  MachineFunction *MF = MBB->getParent();
784  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
785 
787  = MFI->getSGPRToVGPRSpills(Index);
788  bool SpillToVGPR = !VGPRSpills.empty();
789  if (OnlyToVGPR && !SpillToVGPR)
790  return false;
791 
792  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
793  const SIInstrInfo *TII = ST.getInstrInfo();
794 
795  Register SuperReg = MI->getOperand(0).getReg();
796  bool IsKill = MI->getOperand(0).isKill();
797  const DebugLoc &DL = MI->getDebugLoc();
798 
799  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
800 
801  bool SpillToSMEM = spillSGPRToSMEM();
802  if (SpillToSMEM && OnlyToVGPR)
803  return false;
804 
805  Register FrameReg = getFrameRegister(*MF);
806 
807  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
808  SuperReg != MFI->getFrameOffsetReg() &&
809  SuperReg != MFI->getScratchWaveOffsetReg()));
810 
811  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
812 
813  unsigned OffsetReg = AMDGPU::M0;
814  unsigned M0CopyReg = AMDGPU::NoRegister;
815 
816  if (SpillToSMEM) {
817  if (RS->isRegUsed(AMDGPU::M0)) {
818  M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
819  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
820  .addReg(AMDGPU::M0);
821  }
822  }
823 
824  unsigned ScalarStoreOp;
825  unsigned EltSize = 4;
826  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
827  if (SpillToSMEM && isSGPRClass(RC)) {
828  // XXX - if private_element_size is larger than 4 it might be useful to be
829  // able to spill wider vmem spills.
830  std::tie(EltSize, ScalarStoreOp) =
831  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
832  }
833 
834  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
835  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
836 
837  // Scavenged temporary VGPR to use. It must be scavenged once for any number
838  // of spilled subregs.
839  Register TmpVGPR;
840 
841  // SubReg carries the "Kill" flag when SubReg == SuperReg.
842  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
843  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
844  Register SubReg =
845  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
846 
847  if (SpillToSMEM) {
848  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
849 
850  // The allocated memory size is really the wavefront size * the frame
851  // index size. The widest register class is 64 bytes, so a 4-byte scratch
852  // allocation is enough to spill this in a single stack object.
853  //
854  // FIXME: Frame size/offsets are computed earlier than this, so the extra
855  // space is still unnecessarily allocated.
856 
857  unsigned Align = FrameInfo.getObjectAlignment(Index);
858  MachinePointerInfo PtrInfo
859  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
860  MachineMemOperand *MMO
862  EltSize, MinAlign(Align, EltSize * i));
863 
864  // SMEM instructions only support a single offset, so increment the wave
865  // offset.
866 
867  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
868  if (Offset != 0) {
869  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
870  .addReg(FrameReg)
871  .addImm(Offset);
872  } else {
873  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
874  .addReg(FrameReg);
875  }
876 
877  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
878  .addReg(SubReg, getKillRegState(IsKill)) // sdata
879  .addReg(MFI->getScratchRSrcReg()) // sbase
880  .addReg(OffsetReg, RegState::Kill) // soff
881  .addImm(0) // glc
882  .addImm(0) // dlc
883  .addMemOperand(MMO);
884 
885  continue;
886  }
887 
888  if (SpillToVGPR) {
889  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
890 
891  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
892  // only circumstance in which we say it is undefined is when it is the
893  // first spill to this VGPR in the first basic block.
894  bool VGPRDefined = true;
895  if (MBB == &MF->front())
896  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
897 
898  // Mark the "old value of vgpr" input undef only if this is the first sgpr
899  // spill to this specific vgpr in the first basic block.
900  BuildMI(*MBB, MI, DL,
901  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
902  Spill.VGPR)
903  .addReg(SubReg, getKillRegState(IsKill))
904  .addImm(Spill.Lane)
905  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
906 
907  // FIXME: Since this spills to another register instead of an actual
908  // frame index, we should delete the frame index when all references to
909  // it are fixed.
910  } else {
911  // XXX - Can to VGPR spill fail for some subregisters but not others?
912  if (OnlyToVGPR)
913  return false;
914 
915  // Spill SGPR to a frame index.
916  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
917  if (!TmpVGPR.isValid())
918  TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
919  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
920 
922  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
923  .addReg(SubReg, SubKillState);
924 
925  // There could be undef components of a spilled super register.
926  // TODO: Can we detect this and skip the spill?
927  if (NumSubRegs > 1) {
928  // The last implicit use of the SuperReg carries the "Kill" flag.
929  unsigned SuperKillState = 0;
930  if (i + 1 == e)
931  SuperKillState |= getKillRegState(IsKill);
932  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
933  }
934 
935  unsigned Align = FrameInfo.getObjectAlignment(Index);
936  MachinePointerInfo PtrInfo
937  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
938  MachineMemOperand *MMO
940  EltSize, MinAlign(Align, EltSize * i));
941  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
942  .addReg(TmpVGPR, RegState::Kill) // src
943  .addFrameIndex(Index) // vaddr
944  .addReg(MFI->getScratchRSrcReg()) // srrsrc
945  .addReg(MFI->getStackPtrOffsetReg()) // soffset
946  .addImm(i * 4) // offset
947  .addMemOperand(MMO);
948  }
949  }
950 
951  if (M0CopyReg != AMDGPU::NoRegister) {
952  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
953  .addReg(M0CopyReg, RegState::Kill);
954  }
955 
956  MI->eraseFromParent();
957  MFI->addToSpilledSGPRs(NumSubRegs);
958  return true;
959 }
960 
962  int Index,
963  RegScavenger *RS,
964  bool OnlyToVGPR) const {
965  MachineFunction *MF = MI->getParent()->getParent();
966  MachineBasicBlock *MBB = MI->getParent();
968 
970  = MFI->getSGPRToVGPRSpills(Index);
971  bool SpillToVGPR = !VGPRSpills.empty();
972  if (OnlyToVGPR && !SpillToVGPR)
973  return false;
974 
975  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
976  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
977  const SIInstrInfo *TII = ST.getInstrInfo();
978  const DebugLoc &DL = MI->getDebugLoc();
979 
980  Register SuperReg = MI->getOperand(0).getReg();
981  bool SpillToSMEM = spillSGPRToSMEM();
982  if (SpillToSMEM && OnlyToVGPR)
983  return false;
984 
985  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
986 
987  unsigned OffsetReg = AMDGPU::M0;
988  unsigned M0CopyReg = AMDGPU::NoRegister;
989 
990  if (SpillToSMEM) {
991  if (RS->isRegUsed(AMDGPU::M0)) {
992  M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
993  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
994  .addReg(AMDGPU::M0);
995  }
996  }
997 
998  unsigned EltSize = 4;
999  unsigned ScalarLoadOp;
1000 
1001  Register FrameReg = getFrameRegister(*MF);
1002 
1003  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
1004  if (SpillToSMEM && isSGPRClass(RC)) {
1005  // XXX - if private_element_size is larger than 4 it might be useful to be
1006  // able to spill wider vmem spills.
1007  std::tie(EltSize, ScalarLoadOp) =
1008  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
1009  }
1010 
1011  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
1012  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
1013 
1014  // SubReg carries the "Kill" flag when SubReg == SuperReg.
1015  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
1016 
1017  Register TmpVGPR;
1018 
1019  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
1020  Register SubReg =
1021  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
1022 
1023  if (SpillToSMEM) {
1024  // FIXME: Size may be > 4 but extra bytes wasted.
1025  unsigned Align = FrameInfo.getObjectAlignment(Index);
1026  MachinePointerInfo PtrInfo
1027  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1028  MachineMemOperand *MMO
1030  EltSize, MinAlign(Align, EltSize * i));
1031 
1032  // Add i * 4 offset
1033  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
1034  if (Offset != 0) {
1035  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
1036  .addReg(FrameReg)
1037  .addImm(Offset);
1038  } else {
1039  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
1040  .addReg(FrameReg);
1041  }
1042 
1043  auto MIB =
1044  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
1045  .addReg(MFI->getScratchRSrcReg()) // sbase
1046  .addReg(OffsetReg, RegState::Kill) // soff
1047  .addImm(0) // glc
1048  .addImm(0) // dlc
1049  .addMemOperand(MMO);
1050 
1051  if (NumSubRegs > 1 && i == 0)
1052  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1053 
1054  continue;
1055  }
1056 
1057  if (SpillToVGPR) {
1058  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1059  auto MIB =
1060  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
1061  SubReg)
1062  .addReg(Spill.VGPR)
1063  .addImm(Spill.Lane);
1064 
1065  if (NumSubRegs > 1 && i == 0)
1066  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1067  } else {
1068  if (OnlyToVGPR)
1069  return false;
1070 
1071  // Restore SGPR from a stack slot.
1072  // FIXME: We should use S_LOAD_DWORD here for VI.
1073  if (!TmpVGPR.isValid())
1074  TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1075  unsigned Align = FrameInfo.getObjectAlignment(Index);
1076 
1077  MachinePointerInfo PtrInfo
1078  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1079 
1080  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
1081  MachineMemOperand::MOLoad, EltSize,
1082  MinAlign(Align, EltSize * i));
1083 
1084  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
1085  .addFrameIndex(Index) // vaddr
1086  .addReg(MFI->getScratchRSrcReg()) // srsrc
1087  .addReg(MFI->getStackPtrOffsetReg()) // soffset
1088  .addImm(i * 4) // offset
1089  .addMemOperand(MMO);
1090 
1091  auto MIB =
1092  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
1093  .addReg(TmpVGPR, RegState::Kill);
1094 
1095  if (NumSubRegs > 1)
1096  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
1097  }
1098  }
1099 
1100  if (M0CopyReg != AMDGPU::NoRegister) {
1101  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
1102  .addReg(M0CopyReg, RegState::Kill);
1103  }
1104 
1105  MI->eraseFromParent();
1106  return true;
1107 }
1108 
1109 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1110 /// a VGPR and the stack slot can be safely eliminated when all other users are
1111 /// handled.
1114  int FI,
1115  RegScavenger *RS) const {
1116  switch (MI->getOpcode()) {
1117  case AMDGPU::SI_SPILL_S1024_SAVE:
1118  case AMDGPU::SI_SPILL_S512_SAVE:
1119  case AMDGPU::SI_SPILL_S256_SAVE:
1120  case AMDGPU::SI_SPILL_S160_SAVE:
1121  case AMDGPU::SI_SPILL_S128_SAVE:
1122  case AMDGPU::SI_SPILL_S96_SAVE:
1123  case AMDGPU::SI_SPILL_S64_SAVE:
1124  case AMDGPU::SI_SPILL_S32_SAVE:
1125  return spillSGPR(MI, FI, RS, true);
1126  case AMDGPU::SI_SPILL_S1024_RESTORE:
1127  case AMDGPU::SI_SPILL_S512_RESTORE:
1128  case AMDGPU::SI_SPILL_S256_RESTORE:
1129  case AMDGPU::SI_SPILL_S160_RESTORE:
1130  case AMDGPU::SI_SPILL_S128_RESTORE:
1131  case AMDGPU::SI_SPILL_S96_RESTORE:
1132  case AMDGPU::SI_SPILL_S64_RESTORE:
1133  case AMDGPU::SI_SPILL_S32_RESTORE:
1134  return restoreSGPR(MI, FI, RS, true);
1135  default:
1136  llvm_unreachable("not an SGPR spill instruction");
1137  }
1138 }
1139 
1141  int SPAdj, unsigned FIOperandNum,
1142  RegScavenger *RS) const {
1143  MachineFunction *MF = MI->getParent()->getParent();
1144  MachineBasicBlock *MBB = MI->getParent();
1146  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1147  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1148  const SIInstrInfo *TII = ST.getInstrInfo();
1149  DebugLoc DL = MI->getDebugLoc();
1150 
1151  assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
1152 
1153  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1154  int Index = MI->getOperand(FIOperandNum).getIndex();
1155 
1156  Register FrameReg = getFrameRegister(*MF);
1157 
1158  switch (MI->getOpcode()) {
1159  // SGPR register spill
1160  case AMDGPU::SI_SPILL_S1024_SAVE:
1161  case AMDGPU::SI_SPILL_S512_SAVE:
1162  case AMDGPU::SI_SPILL_S256_SAVE:
1163  case AMDGPU::SI_SPILL_S160_SAVE:
1164  case AMDGPU::SI_SPILL_S128_SAVE:
1165  case AMDGPU::SI_SPILL_S96_SAVE:
1166  case AMDGPU::SI_SPILL_S64_SAVE:
1167  case AMDGPU::SI_SPILL_S32_SAVE: {
1168  spillSGPR(MI, Index, RS);
1169  break;
1170  }
1171 
1172  // SGPR register restore
1173  case AMDGPU::SI_SPILL_S1024_RESTORE:
1174  case AMDGPU::SI_SPILL_S512_RESTORE:
1175  case AMDGPU::SI_SPILL_S256_RESTORE:
1176  case AMDGPU::SI_SPILL_S160_RESTORE:
1177  case AMDGPU::SI_SPILL_S128_RESTORE:
1178  case AMDGPU::SI_SPILL_S96_RESTORE:
1179  case AMDGPU::SI_SPILL_S64_RESTORE:
1180  case AMDGPU::SI_SPILL_S32_RESTORE: {
1181  restoreSGPR(MI, Index, RS);
1182  break;
1183  }
1184 
1185  // VGPR register spill
1186  case AMDGPU::SI_SPILL_V1024_SAVE:
1187  case AMDGPU::SI_SPILL_V512_SAVE:
1188  case AMDGPU::SI_SPILL_V256_SAVE:
1189  case AMDGPU::SI_SPILL_V160_SAVE:
1190  case AMDGPU::SI_SPILL_V128_SAVE:
1191  case AMDGPU::SI_SPILL_V96_SAVE:
1192  case AMDGPU::SI_SPILL_V64_SAVE:
1193  case AMDGPU::SI_SPILL_V32_SAVE:
1194  case AMDGPU::SI_SPILL_A1024_SAVE:
1195  case AMDGPU::SI_SPILL_A512_SAVE:
1196  case AMDGPU::SI_SPILL_A128_SAVE:
1197  case AMDGPU::SI_SPILL_A64_SAVE:
1198  case AMDGPU::SI_SPILL_A32_SAVE: {
1199  const MachineOperand *VData = TII->getNamedOperand(*MI,
1200  AMDGPU::OpName::vdata);
1201  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1202  MFI->getStackPtrOffsetReg());
1203 
1204  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1205  Index,
1206  VData->getReg(), VData->isKill(),
1207  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1208  FrameReg,
1209  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1210  *MI->memoperands_begin(),
1211  RS);
1212  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1213  MI->eraseFromParent();
1214  break;
1215  }
1216  case AMDGPU::SI_SPILL_V32_RESTORE:
1217  case AMDGPU::SI_SPILL_V64_RESTORE:
1218  case AMDGPU::SI_SPILL_V96_RESTORE:
1219  case AMDGPU::SI_SPILL_V128_RESTORE:
1220  case AMDGPU::SI_SPILL_V160_RESTORE:
1221  case AMDGPU::SI_SPILL_V256_RESTORE:
1222  case AMDGPU::SI_SPILL_V512_RESTORE:
1223  case AMDGPU::SI_SPILL_V1024_RESTORE:
1224  case AMDGPU::SI_SPILL_A32_RESTORE:
1225  case AMDGPU::SI_SPILL_A64_RESTORE:
1226  case AMDGPU::SI_SPILL_A128_RESTORE:
1227  case AMDGPU::SI_SPILL_A512_RESTORE:
1228  case AMDGPU::SI_SPILL_A1024_RESTORE: {
1229  const MachineOperand *VData = TII->getNamedOperand(*MI,
1230  AMDGPU::OpName::vdata);
1231  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1232  MFI->getStackPtrOffsetReg());
1233 
1234  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1235  Index,
1236  VData->getReg(), VData->isKill(),
1237  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1238  FrameReg,
1239  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1240  *MI->memoperands_begin(),
1241  RS);
1242  MI->eraseFromParent();
1243  break;
1244  }
1245 
1246  default: {
1247  const DebugLoc &DL = MI->getDebugLoc();
1248  bool IsMUBUF = TII->isMUBUF(*MI);
1249 
1250  if (!IsMUBUF && !MFI->isEntryFunction()) {
1251  // Convert to an absolute stack address by finding the offset from the
1252  // scratch wave base and scaling by the wave size.
1253  //
1254  // In an entry function/kernel the offset is already the absolute
1255  // address relative to the frame register.
1256 
1257  Register TmpDiffReg =
1258  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1259 
1260  // If there's no free SGPR, in-place modify the FP
1261  Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg;
1262 
1263  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1264  Register ResultReg = IsCopy ?
1265  MI->getOperand(0).getReg() :
1266  RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1267 
1268  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1269  .addReg(FrameReg)
1271 
1272  int64_t Offset = FrameInfo.getObjectOffset(Index);
1273  if (Offset == 0) {
1274  // XXX - This never happens because of emergency scavenging slot at 0?
1275  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1276  .addImm(ST.getWavefrontSizeLog2())
1277  .addReg(DiffReg);
1278  } else {
1279  if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
1280  Register ScaledReg =
1281  RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0);
1282 
1283  BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
1284  ScaledReg)
1285  .addImm(ST.getWavefrontSizeLog2())
1286  .addReg(DiffReg, RegState::Kill);
1287 
1288  const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
1289 
1290  // TODO: Fold if use instruction is another add of a constant.
1291  if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1292  // FIXME: This can fail
1293  MIB.addImm(Offset);
1294  MIB.addReg(ScaledReg, RegState::Kill);
1295  if (!IsVOP2)
1296  MIB.addImm(0); // clamp bit
1297  } else {
1298  Register ConstOffsetReg =
1299  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false);
1300 
1301  // This should always be able to use the unused carry out.
1302  assert(ConstOffsetReg && "this scavenge should not be able to fail");
1303 
1304  BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1305  .addImm(Offset);
1306  MIB.addReg(ConstOffsetReg, RegState::Kill);
1307  MIB.addReg(ScaledReg, RegState::Kill);
1308  MIB.addImm(0); // clamp bit
1309  }
1310  } else {
1311  // We have to produce a carry out, and we there isn't a free SGPR
1312  // pair for it. We can keep the whole computation on the SALU to
1313  // avoid clobbering an additional register at the cost of an extra
1314  // mov.
1315 
1316  // We may have 1 free scratch SGPR even though a carry out is
1317  // unavailable. Only one additional mov is needed.
1318  Register TmpScaledReg =
1319  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1320  Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg;
1321 
1322  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1323  .addReg(DiffReg, RegState::Kill)
1325  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
1326  .addReg(ScaledReg, RegState::Kill)
1327  .addImm(Offset);
1328  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
1329  .addReg(ScaledReg, RegState::Kill);
1330 
1331  // If there were truly no free SGPRs, we need to undo everything.
1332  if (!TmpScaledReg.isValid()) {
1333  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
1334  .addReg(ScaledReg, RegState::Kill)
1335  .addImm(Offset);
1336  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
1337  .addReg(DiffReg, RegState::Kill)
1339  }
1340  }
1341  }
1342 
1343  if (!TmpDiffReg.isValid()) {
1344  // Restore the FP.
1345  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg)
1346  .addReg(FrameReg)
1348  }
1349 
1350  // Don't introduce an extra copy if we're just materializing in a mov.
1351  if (IsCopy)
1352  MI->eraseFromParent();
1353  else
1354  FIOp.ChangeToRegister(ResultReg, false, false, true);
1355  return;
1356  }
1357 
1358  if (IsMUBUF) {
1359  // Disable offen so we don't need a 0 vgpr base.
1360  assert(static_cast<int>(FIOperandNum) ==
1361  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1362  AMDGPU::OpName::vaddr));
1363 
1364  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1365  MFI->getStackPtrOffsetReg());
1366 
1367  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
1368 
1369  int64_t Offset = FrameInfo.getObjectOffset(Index);
1370  int64_t OldImm
1371  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1372  int64_t NewOffset = OldImm + Offset;
1373 
1374  if (isUInt<12>(NewOffset) &&
1375  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1376  MI->eraseFromParent();
1377  return;
1378  }
1379  }
1380 
1381  // If the offset is simply too big, don't convert to a scratch wave offset
1382  // relative index.
1383 
1384  int64_t Offset = FrameInfo.getObjectOffset(Index);
1385  FIOp.ChangeToImmediate(Offset);
1386  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1387  Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1388  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1389  .addImm(Offset);
1390  FIOp.ChangeToRegister(TmpReg, false, false, true);
1391  }
1392  }
1393  }
1394 }
1395 
1398 }
1399 
1400 // FIXME: This is very slow. It might be worth creating a map from physreg to
1401 // register class.
1404 
1405  static const TargetRegisterClass *const BaseClasses[] = {
1406  &AMDGPU::VGPR_32RegClass,
1407  &AMDGPU::SReg_32RegClass,
1408  &AMDGPU::AGPR_32RegClass,
1409  &AMDGPU::VReg_64RegClass,
1410  &AMDGPU::SReg_64RegClass,
1411  &AMDGPU::AReg_64RegClass,
1412  &AMDGPU::VReg_96RegClass,
1413  &AMDGPU::SReg_96RegClass,
1414  &AMDGPU::VReg_128RegClass,
1415  &AMDGPU::SReg_128RegClass,
1416  &AMDGPU::AReg_128RegClass,
1417  &AMDGPU::VReg_160RegClass,
1418  &AMDGPU::SReg_160RegClass,
1419  &AMDGPU::VReg_256RegClass,
1420  &AMDGPU::SReg_256RegClass,
1421  &AMDGPU::VReg_512RegClass,
1422  &AMDGPU::SReg_512RegClass,
1423  &AMDGPU::AReg_512RegClass,
1424  &AMDGPU::SReg_1024RegClass,
1425  &AMDGPU::VReg_1024RegClass,
1426  &AMDGPU::AReg_1024RegClass,
1427  &AMDGPU::SCC_CLASSRegClass,
1428  &AMDGPU::Pseudo_SReg_32RegClass,
1429  &AMDGPU::Pseudo_SReg_128RegClass,
1430  };
1431 
1432  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1433  if (BaseClass->contains(Reg)) {
1434  return BaseClass;
1435  }
1436  }
1437  return nullptr;
1438 }
1439 
1440 // TODO: It might be helpful to have some target specific flags in
1441 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1443  unsigned Size = getRegSizeInBits(*RC);
1444  switch (Size) {
1445  case 32:
1446  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1447  case 64:
1448  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1449  case 96:
1450  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1451  case 128:
1452  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1453  case 160:
1454  return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1455  case 256:
1456  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1457  case 512:
1458  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1459  case 1024:
1460  return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
1461  case 1:
1462  return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
1463  default:
1464  assert(Size < 32 && "Invalid register class size");
1465  return false;
1466  }
1467 }
1468 
1470  unsigned Size = getRegSizeInBits(*RC);
1471  if (Size < 32)
1472  return false;
1473  switch (Size) {
1474  case 32:
1475  return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
1476  case 64:
1477  return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
1478  case 96:
1479  return false;
1480  case 128:
1481  return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
1482  case 160:
1483  case 256:
1484  return false;
1485  case 512:
1486  return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
1487  case 1024:
1488  return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
1489  default:
1490  llvm_unreachable("Invalid register class size");
1491  }
1492 }
1493 
1495  const TargetRegisterClass *SRC) const {
1496  switch (getRegSizeInBits(*SRC)) {
1497  case 32:
1498  return &AMDGPU::VGPR_32RegClass;
1499  case 64:
1500  return &AMDGPU::VReg_64RegClass;
1501  case 96:
1502  return &AMDGPU::VReg_96RegClass;
1503  case 128:
1504  return &AMDGPU::VReg_128RegClass;
1505  case 160:
1506  return &AMDGPU::VReg_160RegClass;
1507  case 256:
1508  return &AMDGPU::VReg_256RegClass;
1509  case 512:
1510  return &AMDGPU::VReg_512RegClass;
1511  case 1024:
1512  return &AMDGPU::VReg_1024RegClass;
1513  case 1:
1514  return &AMDGPU::VReg_1RegClass;
1515  default:
1516  llvm_unreachable("Invalid register class size");
1517  }
1518 }
1519 
1521  const TargetRegisterClass *SRC) const {
1522  switch (getRegSizeInBits(*SRC)) {
1523  case 32:
1524  return &AMDGPU::AGPR_32RegClass;
1525  case 64:
1526  return &AMDGPU::AReg_64RegClass;
1527  case 128:
1528  return &AMDGPU::AReg_128RegClass;
1529  case 512:
1530  return &AMDGPU::AReg_512RegClass;
1531  case 1024:
1532  return &AMDGPU::AReg_1024RegClass;
1533  default:
1534  llvm_unreachable("Invalid register class size");
1535  }
1536 }
1537 
1539  const TargetRegisterClass *VRC) const {
1540  switch (getRegSizeInBits(*VRC)) {
1541  case 32:
1542  return &AMDGPU::SGPR_32RegClass;
1543  case 64:
1544  return &AMDGPU::SReg_64RegClass;
1545  case 96:
1546  return &AMDGPU::SReg_96RegClass;
1547  case 128:
1548  return &AMDGPU::SReg_128RegClass;
1549  case 160:
1550  return &AMDGPU::SReg_160RegClass;
1551  case 256:
1552  return &AMDGPU::SReg_256RegClass;
1553  case 512:
1554  return &AMDGPU::SReg_512RegClass;
1555  case 1024:
1556  return &AMDGPU::SReg_1024RegClass;
1557  default:
1558  llvm_unreachable("Invalid register class size");
1559  }
1560 }
1561 
1563  const TargetRegisterClass *RC, unsigned SubIdx) const {
1564  if (SubIdx == AMDGPU::NoSubRegister)
1565  return RC;
1566 
1567  // We can assume that each lane corresponds to one 32-bit register.
1568  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1569  if (isSGPRClass(RC)) {
1570  switch (Count) {
1571  case 1:
1572  return &AMDGPU::SGPR_32RegClass;
1573  case 2:
1574  return &AMDGPU::SReg_64RegClass;
1575  case 3:
1576  return &AMDGPU::SReg_96RegClass;
1577  case 4:
1578  return &AMDGPU::SReg_128RegClass;
1579  case 5:
1580  return &AMDGPU::SReg_160RegClass;
1581  case 8:
1582  return &AMDGPU::SReg_256RegClass;
1583  case 16:
1584  return &AMDGPU::SReg_512RegClass;
1585  case 32: /* fall-through */
1586  default:
1587  llvm_unreachable("Invalid sub-register class size");
1588  }
1589  } else if (hasAGPRs(RC)) {
1590  switch (Count) {
1591  case 1:
1592  return &AMDGPU::AGPR_32RegClass;
1593  case 2:
1594  return &AMDGPU::AReg_64RegClass;
1595  case 4:
1596  return &AMDGPU::AReg_128RegClass;
1597  case 16:
1598  return &AMDGPU::AReg_512RegClass;
1599  case 32: /* fall-through */
1600  default:
1601  llvm_unreachable("Invalid sub-register class size");
1602  }
1603  } else {
1604  switch (Count) {
1605  case 1:
1606  return &AMDGPU::VGPR_32RegClass;
1607  case 2:
1608  return &AMDGPU::VReg_64RegClass;
1609  case 3:
1610  return &AMDGPU::VReg_96RegClass;
1611  case 4:
1612  return &AMDGPU::VReg_128RegClass;
1613  case 5:
1614  return &AMDGPU::VReg_160RegClass;
1615  case 8:
1616  return &AMDGPU::VReg_256RegClass;
1617  case 16:
1618  return &AMDGPU::VReg_512RegClass;
1619  case 32: /* fall-through */
1620  default:
1621  llvm_unreachable("Invalid sub-register class size");
1622  }
1623  }
1624 }
1625 
1626 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
1627  if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
1629  return !ST.hasMFMAInlineLiteralBug();
1630 
1631  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1632  OpType <= AMDGPU::OPERAND_SRC_LAST;
1633 }
1634 
1636  const TargetRegisterClass *DefRC,
1637  unsigned DefSubReg,
1638  const TargetRegisterClass *SrcRC,
1639  unsigned SrcSubReg) const {
1640  // We want to prefer the smallest register class possible, so we don't want to
1641  // stop and rewrite on anything that looks like a subregister
1642  // extract. Operations mostly don't care about the super register class, so we
1643  // only want to stop on the most basic of copies between the same register
1644  // class.
1645  //
1646  // e.g. if we have something like
1647  // %0 = ...
1648  // %1 = ...
1649  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1650  // %3 = COPY %2, sub0
1651  //
1652  // We want to look through the COPY to find:
1653  // => %3 = COPY %0
1654 
1655  // Plain copy.
1656  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1657 }
1658 
1659 /// Returns a register that is not used at any point in the function.
1660 /// If all registers are used, then this function will return
1661 // AMDGPU::NoRegister.
1662 unsigned
1664  const TargetRegisterClass *RC,
1665  const MachineFunction &MF) const {
1666 
1667  for (unsigned Reg : *RC)
1668  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1669  return Reg;
1670  return AMDGPU::NoRegister;
1671 }
1672 
1674  unsigned EltSize) const {
1675  if (EltSize == 4) {
1676  static const int16_t Sub0_31[] = {
1677  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1678  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1679  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1680  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1681  AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
1682  AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
1683  AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
1684  AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
1685  };
1686 
1687  static const int16_t Sub0_15[] = {
1688  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1689  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1690  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1691  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1692  };
1693 
1694  static const int16_t Sub0_7[] = {
1695  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1696  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1697  };
1698 
1699  static const int16_t Sub0_4[] = {
1700  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1701  };
1702 
1703  static const int16_t Sub0_3[] = {
1704  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1705  };
1706 
1707  static const int16_t Sub0_2[] = {
1708  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1709  };
1710 
1711  static const int16_t Sub0_1[] = {
1712  AMDGPU::sub0, AMDGPU::sub1,
1713  };
1714 
1715  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1716  case 32:
1717  return {};
1718  case 64:
1719  return makeArrayRef(Sub0_1);
1720  case 96:
1721  return makeArrayRef(Sub0_2);
1722  case 128:
1723  return makeArrayRef(Sub0_3);
1724  case 160:
1725  return makeArrayRef(Sub0_4);
1726  case 256:
1727  return makeArrayRef(Sub0_7);
1728  case 512:
1729  return makeArrayRef(Sub0_15);
1730  case 1024:
1731  return makeArrayRef(Sub0_31);
1732  default:
1733  llvm_unreachable("unhandled register size");
1734  }
1735  }
1736 
1737  if (EltSize == 8) {
1738  static const int16_t Sub0_31_64[] = {
1739  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1740  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1741  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1742  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1743  AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
1744  AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
1745  AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
1746  AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
1747  };
1748 
1749  static const int16_t Sub0_15_64[] = {
1750  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1751  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1752  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1753  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1754  };
1755 
1756  static const int16_t Sub0_7_64[] = {
1757  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1758  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1759  };
1760 
1761 
1762  static const int16_t Sub0_3_64[] = {
1763  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1764  };
1765 
1766  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1767  case 64:
1768  return {};
1769  case 128:
1770  return makeArrayRef(Sub0_3_64);
1771  case 256:
1772  return makeArrayRef(Sub0_7_64);
1773  case 512:
1774  return makeArrayRef(Sub0_15_64);
1775  case 1024:
1776  return makeArrayRef(Sub0_31_64);
1777  default:
1778  llvm_unreachable("unhandled register size");
1779  }
1780  }
1781 
1782  if (EltSize == 16) {
1783 
1784  static const int16_t Sub0_31_128[] = {
1785  AMDGPU::sub0_sub1_sub2_sub3,
1786  AMDGPU::sub4_sub5_sub6_sub7,
1787  AMDGPU::sub8_sub9_sub10_sub11,
1788  AMDGPU::sub12_sub13_sub14_sub15,
1789  AMDGPU::sub16_sub17_sub18_sub19,
1790  AMDGPU::sub20_sub21_sub22_sub23,
1791  AMDGPU::sub24_sub25_sub26_sub27,
1792  AMDGPU::sub28_sub29_sub30_sub31
1793  };
1794 
1795  static const int16_t Sub0_15_128[] = {
1796  AMDGPU::sub0_sub1_sub2_sub3,
1797  AMDGPU::sub4_sub5_sub6_sub7,
1798  AMDGPU::sub8_sub9_sub10_sub11,
1799  AMDGPU::sub12_sub13_sub14_sub15
1800  };
1801 
1802  static const int16_t Sub0_7_128[] = {
1803  AMDGPU::sub0_sub1_sub2_sub3,
1804  AMDGPU::sub4_sub5_sub6_sub7
1805  };
1806 
1807  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1808  case 128:
1809  return {};
1810  case 256:
1811  return makeArrayRef(Sub0_7_128);
1812  case 512:
1813  return makeArrayRef(Sub0_15_128);
1814  case 1024:
1815  return makeArrayRef(Sub0_31_128);
1816  default:
1817  llvm_unreachable("unhandled register size");
1818  }
1819  }
1820 
1821  assert(EltSize == 32 && "unhandled elt size");
1822 
1823  static const int16_t Sub0_31_256[] = {
1824  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1825  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1826  AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1827  AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1828  };
1829 
1830  static const int16_t Sub0_15_256[] = {
1831  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1832  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1833  };
1834 
1835  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1836  case 256:
1837  return {};
1838  case 512:
1839  return makeArrayRef(Sub0_15_256);
1840  case 1024:
1841  return makeArrayRef(Sub0_31_256);
1842  default:
1843  llvm_unreachable("unhandled register size");
1844  }
1845 }
1846 
1847 const TargetRegisterClass*
1849  unsigned Reg) const {
1850  if (Register::isVirtualRegister(Reg))
1851  return MRI.getRegClass(Reg);
1852 
1853  return getPhysRegClass(Reg);
1854 }
1855 
1857  unsigned Reg) const {
1858  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1859  assert(RC && "Register class for the reg not found");
1860  return hasVGPRs(RC);
1861 }
1862 
1864  unsigned Reg) const {
1865  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1866  assert(RC && "Register class for the reg not found");
1867  return hasAGPRs(RC);
1868 }
1869 
1871  const TargetRegisterClass *SrcRC,
1872  unsigned SubReg,
1873  const TargetRegisterClass *DstRC,
1874  unsigned DstSubReg,
1875  const TargetRegisterClass *NewRC,
1876  LiveIntervals &LIS) const {
1877  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1878  unsigned DstSize = getRegSizeInBits(*DstRC);
1879  unsigned NewSize = getRegSizeInBits(*NewRC);
1880 
1881  // Do not increase size of registers beyond dword, we would need to allocate
1882  // adjacent registers and constraint regalloc more than needed.
1883 
1884  // Always allow dword coalescing.
1885  if (SrcSize <= 32 || DstSize <= 32)
1886  return true;
1887 
1888  return NewSize <= DstSize || NewSize <= SrcSize;
1889 }
1890 
1892  MachineFunction &MF) const {
1893 
1894  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1896 
1897  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1898  MF.getFunction());
1899  switch (RC->getID()) {
1900  default:
1901  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1902  case AMDGPU::VGPR_32RegClassID:
1903  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1904  case AMDGPU::SGPR_32RegClassID:
1905  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1906  }
1907 }
1908 
1910  unsigned Idx) const {
1911  if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
1912  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1913  const_cast<MachineFunction &>(MF));
1914 
1915  if (Idx == getSGPRPressureSet())
1916  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1917  const_cast<MachineFunction &>(MF));
1918 
1919  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1920 }
1921 
1922 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1923  static const int Empty[] = { -1 };
1924 
1925  if (hasRegUnit(AMDGPU::M0, RegUnit))
1926  return Empty;
1927  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1928 }
1929 
1931  // Not a callee saved register.
1932  return AMDGPU::SGPR30_SGPR31;
1933 }
1934 
1935 const TargetRegisterClass *
1937  const RegisterBank &RB,
1938  const MachineRegisterInfo &MRI) const {
1939  switch (Size) {
1940  case 1: {
1941  switch (RB.getID()) {
1942  case AMDGPU::VGPRRegBankID:
1943  return &AMDGPU::VGPR_32RegClass;
1944  case AMDGPU::VCCRegBankID:
1945  return isWave32 ?
1946  &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
1947  case AMDGPU::SGPRRegBankID:
1948  return &AMDGPU::SReg_32_XM0RegClass;
1949  case AMDGPU::SCCRegBankID:
1950  // This needs to return an allocatable class, so don't bother returning
1951  // the dummy SCC class.
1952  return &AMDGPU::SReg_32_XM0RegClass;
1953  default:
1954  llvm_unreachable("unknown register bank");
1955  }
1956  }
1957  case 32:
1958  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1959  &AMDGPU::SReg_32_XM0RegClass;
1960  case 64:
1961  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1962  &AMDGPU::SReg_64_XEXECRegClass;
1963  case 96:
1964  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1965  &AMDGPU::SReg_96RegClass;
1966  case 128:
1967  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1968  &AMDGPU::SReg_128RegClass;
1969  case 160:
1970  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1971  &AMDGPU::SReg_160RegClass;
1972  case 256:
1973  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1974  &AMDGPU::SReg_256RegClass;
1975  case 512:
1976  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1977  &AMDGPU::SReg_512RegClass;
1978  default:
1979  if (Size < 32)
1980  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1981  &AMDGPU::SReg_32_XM0RegClass;
1982  return nullptr;
1983  }
1984 }
1985 
1986 const TargetRegisterClass *
1988  const MachineRegisterInfo &MRI) const {
1989  if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()))
1990  return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
1991  return nullptr;
1992 }
1993 
1994 unsigned SIRegisterInfo::getVCC() const {
1995  return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
1996 }
1997 
1998 const TargetRegisterClass *
1999 SIRegisterInfo::getRegClass(unsigned RCID) const {
2000  switch ((int)RCID) {
2001  case AMDGPU::SReg_1RegClassID:
2002  return getBoolRC();
2003  case AMDGPU::SReg_1_XEXECRegClassID:
2004  return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2005  : &AMDGPU::SReg_64_XEXECRegClass;
2006  case -1:
2007  return nullptr;
2008  default:
2009  return AMDGPURegisterInfo::getRegClass(RCID);
2010  }
2011 }
2012 
2013 // Find reaching register definition
2015  MachineInstr &Use,
2017  LiveIntervals *LIS) const {
2018  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
2019  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
2020  SlotIndex DefIdx;
2021 
2022  if (Register::isVirtualRegister(Reg)) {
2023  if (!LIS->hasInterval(Reg))
2024  return nullptr;
2025  LiveInterval &LI = LIS->getInterval(Reg);
2026  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
2027  : MRI.getMaxLaneMaskForVReg(Reg);
2028  VNInfo *V = nullptr;
2029  if (LI.hasSubRanges()) {
2030  for (auto &S : LI.subranges()) {
2031  if ((S.LaneMask & SubLanes) == SubLanes) {
2032  V = S.getVNInfoAt(UseIdx);
2033  break;
2034  }
2035  }
2036  } else {
2037  V = LI.getVNInfoAt(UseIdx);
2038  }
2039  if (!V)
2040  return nullptr;
2041  DefIdx = V->def;
2042  } else {
2043  // Find last def.
2044  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
2045  LiveRange &LR = LIS->getRegUnit(*Units);
2046  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
2047  if (!DefIdx.isValid() ||
2048  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
2049  LIS->getInstructionFromIndex(V->def)))
2050  DefIdx = V->def;
2051  } else {
2052  return nullptr;
2053  }
2054  }
2055  }
2056 
2057  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2058 
2059  if (!Def || !MDT.dominates(Def, &Use))
2060  return nullptr;
2061 
2062  assert(Def->modifiesRegister(Reg, this));
2063 
2064  return Def;
2065 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
unsigned getVCC() const
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:679
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:848
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
bool canRealignStack(const MachineFunction &MF) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:709
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:764
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static int getRegClass(RegisterKind Is, unsigned RegWidth)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:792
virtual bool canRealignStack(const MachineFunction &MF) const
True if the stack can be realigned for the target.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasInterval(Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:436
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
static const char * getRegisterName(unsigned RegNo)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:412
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LiveInterval & getInterval(Register Reg)
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
bool isAGPRPressureSet(unsigned SetID) const
bool hasAGPRs(const TargetRegisterClass *RC) const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:214
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
bool hasMFMAInlineLiteralBug() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:419
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool isValid() const
Definition: Register.h:115
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
const TargetRegisterClass * getBoolRC() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getAGPRPressureSet() const
Flags getFlags() const
Return the raw flags of the source value,.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool hasMAIInsts() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned getWavefrontSizeLog2() const
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
bool hasCalls() const
Return true if the current function has any function calls.