LLVM  9.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/LLVMContext.h"
28 
29 using namespace llvm;
30 
31 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
32  for (unsigned i = 0; PSets[i] != -1; ++i) {
33  if (PSets[i] == (int)PSetID)
34  return true;
35  }
36  return false;
37 }
38 
39 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
40  BitVector &PressureSets) const {
41  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
42  const int *PSets = getRegUnitPressureSets(*U);
43  if (hasPressureSet(PSets, PSetID)) {
44  PressureSets.set(PSetID);
45  break;
46  }
47  }
48 }
49 
51  "amdgpu-spill-sgpr-to-smem",
52  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
53  cl::init(false));
54 
56  "amdgpu-spill-sgpr-to-vgpr",
57  cl::desc("Enable spilling VGPRs to SGPRs"),
59  cl::init(true));
60 
63  SGPRPressureSets(getNumRegPressureSets()),
64  VGPRPressureSets(getNumRegPressureSets()),
65  SpillSGPRToVGPR(false),
66  SpillSGPRToSMEM(false) {
67  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
68  SpillSGPRToSMEM = true;
69  else if (EnableSpillSGPRToVGPR)
70  SpillSGPRToVGPR = true;
71 
72  unsigned NumRegPressureSets = getNumRegPressureSets();
73 
74  SGPRSetID = NumRegPressureSets;
75  VGPRSetID = NumRegPressureSets;
76 
77  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
78  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
79  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
80  }
81 
82  // Determine the number of reg units for each pressure set.
83  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
84  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
85  const int *PSets = getRegUnitPressureSets(i);
86  for (unsigned j = 0; PSets[j] != -1; ++j) {
87  ++PressureSetRegUnits[PSets[j]];
88  }
89  }
90 
91  unsigned VGPRMax = 0, SGPRMax = 0;
92  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
93  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
94  VGPRSetID = i;
95  VGPRMax = PressureSetRegUnits[i];
96  continue;
97  }
98  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
99  SGPRSetID = i;
100  SGPRMax = PressureSetRegUnits[i];
101  }
102  }
103 
104  assert(SGPRSetID < NumRegPressureSets &&
105  VGPRSetID < NumRegPressureSets);
106 }
107 
109  const MachineFunction &MF) const {
110 
111  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
112  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
113  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
114  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
115 }
116 
117 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
118  unsigned Reg;
119 
120  // Try to place it in a hole after PrivateSegmentBufferReg.
121  if (RegCount & 3) {
122  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
123  // alignment constraints, so we have a hole where can put the wave offset.
124  Reg = RegCount - 1;
125  } else {
126  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
127  // wave offset before it.
128  Reg = RegCount - 5;
129  }
130 
131  return Reg;
132 }
133 
135  const MachineFunction &MF) const {
136  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
138  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
139 }
140 
142  const MachineFunction &MF) const {
143  return AMDGPU::SGPR32;
144 }
145 
147  BitVector Reserved(getNumRegs());
148 
149  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
150  // this seems likely to result in bugs, so I'm marking them as reserved.
151  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
152  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
153 
154  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
155  reserveRegisterTuples(Reserved, AMDGPU::M0);
156 
157  // Reserve the memory aperture registers.
158  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
160  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
161  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
162 
163  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
164  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
165 
166  // Reserve xnack_mask registers - support is not implemented in Codegen.
167  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
168 
169  // Reserve lds_direct register - support is not implemented in Codegen.
170  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
171 
172  // Reserve Trap Handler registers - support is not implemented in Codegen.
173  reserveRegisterTuples(Reserved, AMDGPU::TBA);
174  reserveRegisterTuples(Reserved, AMDGPU::TMA);
175  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
176  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
177  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
178  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
179  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
180  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
181  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
182  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
183 
184  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
185 
186  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
187  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
188  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
189  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
190  reserveRegisterTuples(Reserved, Reg);
191  }
192 
193  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
194  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
195  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
196  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
197  reserveRegisterTuples(Reserved, Reg);
198  }
199 
201 
202  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
203  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
204  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
205  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
206  }
207 
208  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
209  if (ScratchRSrcReg != AMDGPU::NoRegister) {
210  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
211  // to spill.
212  // TODO: May need to reserve a VGPR if doing LDS spilling.
213  reserveRegisterTuples(Reserved, ScratchRSrcReg);
214  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
215  }
216 
217  // We have to assume the SP is needed in case there are calls in the function,
218  // which is detected after the function is lowered. If we aren't really going
219  // to need SP, don't bother reserving it.
220  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
221 
222  if (StackPtrReg != AMDGPU::NoRegister) {
223  reserveRegisterTuples(Reserved, StackPtrReg);
224  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
225  }
226 
227  unsigned FrameReg = MFI->getFrameOffsetReg();
228  if (FrameReg != AMDGPU::NoRegister) {
229  reserveRegisterTuples(Reserved, FrameReg);
230  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
231  }
232 
233  return Reserved;
234 }
235 
238  if (Info->isEntryFunction()) {
239  const MachineFrameInfo &MFI = Fn.getFrameInfo();
240  return MFI.hasStackObjects() || MFI.hasCalls();
241  }
242 
243  // May need scavenger for dealing with callee saved registers.
244  return true;
245 }
246 
248  const MachineFunction &MF) const {
249  const MachineFrameInfo &MFI = MF.getFrameInfo();
250  if (MFI.hasStackObjects())
251  return true;
252 
253  // May need to deal with callee saved registers.
255  return !Info->isEntryFunction();
256 }
257 
259  const MachineFunction &MF) const {
260  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
261  // create a virtual register for it during frame index elimination, so the
262  // scavenger is directly needed.
263  return MF.getFrameInfo().hasStackObjects() &&
264  MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
265  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
266 }
267 
269  const MachineFunction &) const {
270  // There are no special dedicated stack or frame pointers.
271  return true;
272 }
273 
275  // This helps catch bugs as verifier errors.
276  return true;
277 }
278 
281 
282  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
283  AMDGPU::OpName::offset);
284  return MI->getOperand(OffIdx).getImm();
285 }
286 
288  int Idx) const {
289  if (!SIInstrInfo::isMUBUF(*MI))
290  return 0;
291 
293  AMDGPU::OpName::vaddr) &&
294  "Should never see frame index on non-address operand");
295 
296  return getMUBUFInstrOffset(MI);
297 }
298 
300  if (!MI->mayLoadOrStore())
301  return false;
302 
303  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
304 
305  return !isUInt<12>(FullOffset);
306 }
307 
309  unsigned BaseReg,
310  int FrameIdx,
311  int64_t Offset) const {
313  DebugLoc DL; // Defaults to "unknown"
314 
315  if (Ins != MBB->end())
316  DL = Ins->getDebugLoc();
317 
318  MachineFunction *MF = MBB->getParent();
319  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
320  const SIInstrInfo *TII = Subtarget.getInstrInfo();
321 
322  if (Offset == 0) {
323  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
324  .addFrameIndex(FrameIdx);
325  return;
326  }
327 
329  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
330 
331  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
332 
333  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
334  .addImm(Offset);
335  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
336  .addFrameIndex(FrameIdx);
337 
338  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
339  .addReg(OffsetReg, RegState::Kill)
340  .addReg(FIReg)
341  .addImm(0); // clamp bit
342 }
343 
345  int64_t Offset) const {
346 
347  MachineBasicBlock *MBB = MI.getParent();
348  MachineFunction *MF = MBB->getParent();
349  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
350  const SIInstrInfo *TII = Subtarget.getInstrInfo();
351 
352 #ifndef NDEBUG
353  // FIXME: Is it possible to be storing a frame index to itself?
354  bool SeenFI = false;
355  for (const MachineOperand &MO: MI.operands()) {
356  if (MO.isFI()) {
357  if (SeenFI)
358  llvm_unreachable("should not see multiple frame indices");
359 
360  SeenFI = true;
361  }
362  }
363 #endif
364 
365  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
366  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
367  assert(TII->isMUBUF(MI));
368  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
369  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
370  "should only be seeing frame offset relative FrameIndex");
371 
372 
373  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
374  int64_t NewOffset = OffsetOp->getImm() + Offset;
375  assert(isUInt<12>(NewOffset) && "offset should be legal");
376 
377  FIOp->ChangeToRegister(BaseReg, false);
378  OffsetOp->setImm(NewOffset);
379 }
380 
382  unsigned BaseReg,
383  int64_t Offset) const {
384  if (!SIInstrInfo::isMUBUF(*MI))
385  return false;
386 
387  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
388 
389  return isUInt<12>(NewOffset);
390 }
391 
393  const MachineFunction &MF, unsigned Kind) const {
394  // This is inaccurate. It depends on the instruction and address space. The
395  // only place where we should hit this is for dealing with frame indexes /
396  // private accesses, so this is correct in that case.
397  return &AMDGPU::VGPR_32RegClass;
398 }
399 
400 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
401 
402  switch (Op) {
403  case AMDGPU::SI_SPILL_S512_SAVE:
404  case AMDGPU::SI_SPILL_S512_RESTORE:
405  case AMDGPU::SI_SPILL_V512_SAVE:
406  case AMDGPU::SI_SPILL_V512_RESTORE:
407  return 16;
408  case AMDGPU::SI_SPILL_S256_SAVE:
409  case AMDGPU::SI_SPILL_S256_RESTORE:
410  case AMDGPU::SI_SPILL_V256_SAVE:
411  case AMDGPU::SI_SPILL_V256_RESTORE:
412  return 8;
413  case AMDGPU::SI_SPILL_S128_SAVE:
414  case AMDGPU::SI_SPILL_S128_RESTORE:
415  case AMDGPU::SI_SPILL_V128_SAVE:
416  case AMDGPU::SI_SPILL_V128_RESTORE:
417  return 4;
418  case AMDGPU::SI_SPILL_S96_SAVE:
419  case AMDGPU::SI_SPILL_S96_RESTORE:
420  case AMDGPU::SI_SPILL_V96_SAVE:
421  case AMDGPU::SI_SPILL_V96_RESTORE:
422  return 3;
423  case AMDGPU::SI_SPILL_S64_SAVE:
424  case AMDGPU::SI_SPILL_S64_RESTORE:
425  case AMDGPU::SI_SPILL_V64_SAVE:
426  case AMDGPU::SI_SPILL_V64_RESTORE:
427  return 2;
428  case AMDGPU::SI_SPILL_S32_SAVE:
429  case AMDGPU::SI_SPILL_S32_RESTORE:
430  case AMDGPU::SI_SPILL_V32_SAVE:
431  case AMDGPU::SI_SPILL_V32_RESTORE:
432  return 1;
433  default: llvm_unreachable("Invalid spill opcode");
434  }
435 }
436 
437 static int getOffsetMUBUFStore(unsigned Opc) {
438  switch (Opc) {
439  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
440  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
441  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
442  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
443  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
444  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
445  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
446  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
447  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
448  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
449  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
450  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
451  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
452  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
453  default:
454  return -1;
455  }
456 }
457 
458 static int getOffsetMUBUFLoad(unsigned Opc) {
459  switch (Opc) {
460  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
461  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
462  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
463  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
464  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
465  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
466  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
467  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
468  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
469  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
470  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
471  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
472  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
473  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
474  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
475  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
476  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
477  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
478  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
479  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
480  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
481  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
482  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
483  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
484  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
485  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
486  default:
487  return -1;
488  }
489 }
490 
491 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
492 // need to handle the case where an SGPR may need to be spilled while spilling.
494  MachineFrameInfo &MFI,
496  int Index,
497  int64_t Offset) {
498  MachineBasicBlock *MBB = MI->getParent();
499  const DebugLoc &DL = MI->getDebugLoc();
500  bool IsStore = MI->mayStore();
501 
502  unsigned Opc = MI->getOpcode();
503  int LoadStoreOp = IsStore ?
505  if (LoadStoreOp == -1)
506  return false;
507 
508  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
509  MachineInstrBuilder NewMI =
510  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
511  .add(*Reg)
512  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
513  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
514  .addImm(Offset)
515  .addImm(0) // glc
516  .addImm(0) // slc
517  .addImm(0) // tfe
518  .cloneMemRefs(*MI);
519 
520  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
521  AMDGPU::OpName::vdata_in);
522  if (VDataIn)
523  NewMI.add(*VDataIn);
524  return true;
525 }
526 
527 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
528  unsigned LoadStoreOp,
529  int Index,
530  unsigned ValueReg,
531  bool IsKill,
532  unsigned ScratchRsrcReg,
533  unsigned ScratchOffsetReg,
534  int64_t InstOffset,
535  MachineMemOperand *MMO,
536  RegScavenger *RS) const {
537  MachineBasicBlock *MBB = MI->getParent();
538  MachineFunction *MF = MI->getParent()->getParent();
539  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
540  const SIInstrInfo *TII = ST.getInstrInfo();
541  const MachineFrameInfo &MFI = MF->getFrameInfo();
542 
543  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
544  const DebugLoc &DL = MI->getDebugLoc();
545  bool IsStore = Desc.mayStore();
546 
547  bool Scavenged = false;
548  unsigned SOffset = ScratchOffsetReg;
549 
550  const unsigned EltSize = 4;
551  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
552  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
553  unsigned Size = NumSubRegs * EltSize;
554  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
555  int64_t ScratchOffsetRegDelta = 0;
556 
557  unsigned Align = MFI.getObjectAlignment(Index);
558  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
559 
560  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
561 
562  if (!isUInt<12>(Offset + Size - EltSize)) {
563  SOffset = AMDGPU::NoRegister;
564 
565  // We currently only support spilling VGPRs to EltSize boundaries, meaning
566  // we can simplify the adjustment of Offset here to just scale with
567  // WavefrontSize.
568  Offset *= ST.getWavefrontSize();
569 
570  // We don't have access to the register scavenger if this function is called
571  // during PEI::scavengeFrameVirtualRegs().
572  if (RS)
573  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0, false);
574 
575  if (SOffset == AMDGPU::NoRegister) {
576  // There are no free SGPRs, and since we are in the process of spilling
577  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
578  // on SI/CI and on VI it is true until we implement spilling using scalar
579  // stores), we have no way to free up an SGPR. Our solution here is to
580  // add the offset directly to the ScratchOffset register, and then
581  // subtract the offset after the spill to return ScratchOffset to it's
582  // original value.
583  SOffset = ScratchOffsetReg;
584  ScratchOffsetRegDelta = Offset;
585  } else {
586  Scavenged = true;
587  }
588 
589  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
590  .addReg(ScratchOffsetReg)
591  .addImm(Offset);
592 
593  Offset = 0;
594  }
595 
596  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
597  unsigned SubReg = NumSubRegs == 1 ?
598  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
599 
600  unsigned SOffsetRegState = 0;
601  unsigned SrcDstRegState = getDefRegState(!IsStore);
602  if (i + 1 == e) {
603  SOffsetRegState |= getKillRegState(Scavenged);
604  // The last implicit use carries the "Kill" flag.
605  SrcDstRegState |= getKillRegState(IsKill);
606  }
607 
608  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
609  MachineMemOperand *NewMMO
610  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
611  EltSize, MinAlign(Align, EltSize * i));
612 
613  auto MIB = BuildMI(*MBB, MI, DL, Desc)
614  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
615  .addReg(ScratchRsrcReg)
616  .addReg(SOffset, SOffsetRegState)
617  .addImm(Offset)
618  .addImm(0) // glc
619  .addImm(0) // slc
620  .addImm(0) // tfe
621  .addMemOperand(NewMMO);
622 
623  if (NumSubRegs > 1)
624  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
625  }
626 
627  if (ScratchOffsetRegDelta != 0) {
628  // Subtract the offset we added to the ScratchOffset register.
629  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
630  .addReg(ScratchOffsetReg)
631  .addImm(ScratchOffsetRegDelta);
632  }
633 }
634 
635 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
636  bool Store) {
637  if (SuperRegSize % 16 == 0) {
638  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
639  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
640  }
641 
642  if (SuperRegSize % 8 == 0) {
643  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
644  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
645  }
646 
647  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
648  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
649 }
650 
652  int Index,
653  RegScavenger *RS,
654  bool OnlyToVGPR) const {
655  MachineBasicBlock *MBB = MI->getParent();
656  MachineFunction *MF = MBB->getParent();
658  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
659 
661  = MFI->getSGPRToVGPRSpills(Index);
662  bool SpillToVGPR = !VGPRSpills.empty();
663  if (OnlyToVGPR && !SpillToVGPR)
664  return false;
665 
667  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
668  const SIInstrInfo *TII = ST.getInstrInfo();
669 
670  unsigned SuperReg = MI->getOperand(0).getReg();
671  bool IsKill = MI->getOperand(0).isKill();
672  const DebugLoc &DL = MI->getDebugLoc();
673 
674  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
675 
676  bool SpillToSMEM = spillSGPRToSMEM();
677  if (SpillToSMEM && OnlyToVGPR)
678  return false;
679 
680  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
681  SuperReg != MFI->getFrameOffsetReg() &&
682  SuperReg != MFI->getScratchWaveOffsetReg()));
683 
684  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
685 
686  unsigned OffsetReg = AMDGPU::M0;
687  unsigned M0CopyReg = AMDGPU::NoRegister;
688 
689  if (SpillToSMEM) {
690  if (RS->isRegUsed(AMDGPU::M0)) {
691  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
692  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
693  .addReg(AMDGPU::M0);
694  }
695  }
696 
697  unsigned ScalarStoreOp;
698  unsigned EltSize = 4;
699  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
700  if (SpillToSMEM && isSGPRClass(RC)) {
701  // XXX - if private_element_size is larger than 4 it might be useful to be
702  // able to spill wider vmem spills.
703  std::tie(EltSize, ScalarStoreOp) =
704  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
705  }
706 
707  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
708  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
709 
710  // SubReg carries the "Kill" flag when SubReg == SuperReg.
711  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
712  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
713  unsigned SubReg = NumSubRegs == 1 ?
714  SuperReg : getSubReg(SuperReg, SplitParts[i]);
715 
716  if (SpillToSMEM) {
717  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
718 
719  // The allocated memory size is really the wavefront size * the frame
720  // index size. The widest register class is 64 bytes, so a 4-byte scratch
721  // allocation is enough to spill this in a single stack object.
722  //
723  // FIXME: Frame size/offsets are computed earlier than this, so the extra
724  // space is still unnecessarily allocated.
725 
726  unsigned Align = FrameInfo.getObjectAlignment(Index);
727  MachinePointerInfo PtrInfo
728  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
729  MachineMemOperand *MMO
731  EltSize, MinAlign(Align, EltSize * i));
732 
733  // SMEM instructions only support a single offset, so increment the wave
734  // offset.
735 
736  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
737  if (Offset != 0) {
738  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
739  .addReg(MFI->getFrameOffsetReg())
740  .addImm(Offset);
741  } else {
742  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
743  .addReg(MFI->getFrameOffsetReg());
744  }
745 
746  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
747  .addReg(SubReg, getKillRegState(IsKill)) // sdata
748  .addReg(MFI->getScratchRSrcReg()) // sbase
749  .addReg(OffsetReg, RegState::Kill) // soff
750  .addImm(0) // glc
751  .addMemOperand(MMO);
752 
753  continue;
754  }
755 
756  if (SpillToVGPR) {
757  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
758 
759  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
760  // only circumstance in which we say it is undefined is when it is the
761  // first spill to this VGPR in the first basic block.
762  bool VGPRDefined = true;
763  if (MBB == &MF->front())
764  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
765 
766  // Mark the "old value of vgpr" input undef only if this is the first sgpr
767  // spill to this specific vgpr in the first basic block.
768  BuildMI(*MBB, MI, DL,
769  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
770  Spill.VGPR)
771  .addReg(SubReg, getKillRegState(IsKill))
772  .addImm(Spill.Lane)
773  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
774 
775  // FIXME: Since this spills to another register instead of an actual
776  // frame index, we should delete the frame index when all references to
777  // it are fixed.
778  } else {
779  // XXX - Can to VGPR spill fail for some subregisters but not others?
780  if (OnlyToVGPR)
781  return false;
782 
783  // Spill SGPR to a frame index.
784  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
785  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
786  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
787 
789  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
790  .addReg(SubReg, SubKillState);
791 
792 
793  // There could be undef components of a spilled super register.
794  // TODO: Can we detect this and skip the spill?
795  if (NumSubRegs > 1) {
796  // The last implicit use of the SuperReg carries the "Kill" flag.
797  unsigned SuperKillState = 0;
798  if (i + 1 == e)
799  SuperKillState |= getKillRegState(IsKill);
800  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
801  }
802 
803  unsigned Align = FrameInfo.getObjectAlignment(Index);
804  MachinePointerInfo PtrInfo
805  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
806  MachineMemOperand *MMO
808  EltSize, MinAlign(Align, EltSize * i));
809  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
810  .addReg(TmpReg, RegState::Kill) // src
811  .addFrameIndex(Index) // vaddr
812  .addReg(MFI->getScratchRSrcReg()) // srrsrc
813  .addReg(MFI->getFrameOffsetReg()) // soffset
814  .addImm(i * 4) // offset
815  .addMemOperand(MMO);
816  }
817  }
818 
819  if (M0CopyReg != AMDGPU::NoRegister) {
820  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
821  .addReg(M0CopyReg, RegState::Kill);
822  }
823 
824  MI->eraseFromParent();
825  MFI->addToSpilledSGPRs(NumSubRegs);
826  return true;
827 }
828 
830  int Index,
831  RegScavenger *RS,
832  bool OnlyToVGPR) const {
833  MachineFunction *MF = MI->getParent()->getParent();
835  MachineBasicBlock *MBB = MI->getParent();
837 
839  = MFI->getSGPRToVGPRSpills(Index);
840  bool SpillToVGPR = !VGPRSpills.empty();
841  if (OnlyToVGPR && !SpillToVGPR)
842  return false;
843 
844  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
845  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
846  const SIInstrInfo *TII = ST.getInstrInfo();
847  const DebugLoc &DL = MI->getDebugLoc();
848 
849  unsigned SuperReg = MI->getOperand(0).getReg();
850  bool SpillToSMEM = spillSGPRToSMEM();
851  if (SpillToSMEM && OnlyToVGPR)
852  return false;
853 
854  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
855 
856  unsigned OffsetReg = AMDGPU::M0;
857  unsigned M0CopyReg = AMDGPU::NoRegister;
858 
859  if (SpillToSMEM) {
860  if (RS->isRegUsed(AMDGPU::M0)) {
861  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
862  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
863  .addReg(AMDGPU::M0);
864  }
865  }
866 
867  unsigned EltSize = 4;
868  unsigned ScalarLoadOp;
869 
870  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
871  if (SpillToSMEM && isSGPRClass(RC)) {
872  // XXX - if private_element_size is larger than 4 it might be useful to be
873  // able to spill wider vmem spills.
874  std::tie(EltSize, ScalarLoadOp) =
875  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
876  }
877 
878  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
879  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
880 
881  // SubReg carries the "Kill" flag when SubReg == SuperReg.
882  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
883 
884  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
885  unsigned SubReg = NumSubRegs == 1 ?
886  SuperReg : getSubReg(SuperReg, SplitParts[i]);
887 
888  if (SpillToSMEM) {
889  // FIXME: Size may be > 4 but extra bytes wasted.
890  unsigned Align = FrameInfo.getObjectAlignment(Index);
891  MachinePointerInfo PtrInfo
892  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
893  MachineMemOperand *MMO
895  EltSize, MinAlign(Align, EltSize * i));
896 
897  // Add i * 4 offset
898  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
899  if (Offset != 0) {
900  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
901  .addReg(MFI->getFrameOffsetReg())
902  .addImm(Offset);
903  } else {
904  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
905  .addReg(MFI->getFrameOffsetReg());
906  }
907 
908  auto MIB =
909  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
910  .addReg(MFI->getScratchRSrcReg()) // sbase
911  .addReg(OffsetReg, RegState::Kill) // soff
912  .addImm(0) // glc
913  .addMemOperand(MMO);
914 
915  if (NumSubRegs > 1 && i == 0)
916  MIB.addReg(SuperReg, RegState::ImplicitDefine);
917 
918  continue;
919  }
920 
921  if (SpillToVGPR) {
922  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
923  auto MIB =
924  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
925  SubReg)
926  .addReg(Spill.VGPR)
927  .addImm(Spill.Lane);
928 
929  if (NumSubRegs > 1 && i == 0)
930  MIB.addReg(SuperReg, RegState::ImplicitDefine);
931  } else {
932  if (OnlyToVGPR)
933  return false;
934 
935  // Restore SGPR from a stack slot.
936  // FIXME: We should use S_LOAD_DWORD here for VI.
937  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
938  unsigned Align = FrameInfo.getObjectAlignment(Index);
939 
940  MachinePointerInfo PtrInfo
941  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
942 
943  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
944  MachineMemOperand::MOLoad, EltSize,
945  MinAlign(Align, EltSize * i));
946 
947  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
948  .addFrameIndex(Index) // vaddr
949  .addReg(MFI->getScratchRSrcReg()) // srsrc
950  .addReg(MFI->getFrameOffsetReg()) // soffset
951  .addImm(i * 4) // offset
952  .addMemOperand(MMO);
953 
954  auto MIB =
955  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
956  .addReg(TmpReg, RegState::Kill);
957 
958  if (NumSubRegs > 1)
959  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
960  }
961  }
962 
963  if (M0CopyReg != AMDGPU::NoRegister) {
964  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
965  .addReg(M0CopyReg, RegState::Kill);
966  }
967 
968  MI->eraseFromParent();
969  return true;
970 }
971 
972 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
973 /// a VGPR and the stack slot can be safely eliminated when all other users are
974 /// handled.
977  int FI,
978  RegScavenger *RS) const {
979  switch (MI->getOpcode()) {
980  case AMDGPU::SI_SPILL_S512_SAVE:
981  case AMDGPU::SI_SPILL_S256_SAVE:
982  case AMDGPU::SI_SPILL_S128_SAVE:
983  case AMDGPU::SI_SPILL_S96_SAVE:
984  case AMDGPU::SI_SPILL_S64_SAVE:
985  case AMDGPU::SI_SPILL_S32_SAVE:
986  return spillSGPR(MI, FI, RS, true);
987  case AMDGPU::SI_SPILL_S512_RESTORE:
988  case AMDGPU::SI_SPILL_S256_RESTORE:
989  case AMDGPU::SI_SPILL_S128_RESTORE:
990  case AMDGPU::SI_SPILL_S96_RESTORE:
991  case AMDGPU::SI_SPILL_S64_RESTORE:
992  case AMDGPU::SI_SPILL_S32_RESTORE:
993  return restoreSGPR(MI, FI, RS, true);
994  default:
995  llvm_unreachable("not an SGPR spill instruction");
996  }
997 }
998 
1000  int SPAdj, unsigned FIOperandNum,
1001  RegScavenger *RS) const {
1002  MachineFunction *MF = MI->getParent()->getParent();
1004  MachineBasicBlock *MBB = MI->getParent();
1006  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1007  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1008  const SIInstrInfo *TII = ST.getInstrInfo();
1009  DebugLoc DL = MI->getDebugLoc();
1010 
1011  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1012  int Index = MI->getOperand(FIOperandNum).getIndex();
1013 
1014  switch (MI->getOpcode()) {
1015  // SGPR register spill
1016  case AMDGPU::SI_SPILL_S512_SAVE:
1017  case AMDGPU::SI_SPILL_S256_SAVE:
1018  case AMDGPU::SI_SPILL_S128_SAVE:
1019  case AMDGPU::SI_SPILL_S96_SAVE:
1020  case AMDGPU::SI_SPILL_S64_SAVE:
1021  case AMDGPU::SI_SPILL_S32_SAVE: {
1022  spillSGPR(MI, Index, RS);
1023  break;
1024  }
1025 
1026  // SGPR register restore
1027  case AMDGPU::SI_SPILL_S512_RESTORE:
1028  case AMDGPU::SI_SPILL_S256_RESTORE:
1029  case AMDGPU::SI_SPILL_S128_RESTORE:
1030  case AMDGPU::SI_SPILL_S96_RESTORE:
1031  case AMDGPU::SI_SPILL_S64_RESTORE:
1032  case AMDGPU::SI_SPILL_S32_RESTORE: {
1033  restoreSGPR(MI, Index, RS);
1034  break;
1035  }
1036 
1037  // VGPR register spill
1038  case AMDGPU::SI_SPILL_V512_SAVE:
1039  case AMDGPU::SI_SPILL_V256_SAVE:
1040  case AMDGPU::SI_SPILL_V128_SAVE:
1041  case AMDGPU::SI_SPILL_V96_SAVE:
1042  case AMDGPU::SI_SPILL_V64_SAVE:
1043  case AMDGPU::SI_SPILL_V32_SAVE: {
1044  const MachineOperand *VData = TII->getNamedOperand(*MI,
1045  AMDGPU::OpName::vdata);
1046  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1047  Index,
1048  VData->getReg(), VData->isKill(),
1049  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1050  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1051  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1052  *MI->memoperands_begin(),
1053  RS);
1054  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1055  MI->eraseFromParent();
1056  break;
1057  }
1058  case AMDGPU::SI_SPILL_V32_RESTORE:
1059  case AMDGPU::SI_SPILL_V64_RESTORE:
1060  case AMDGPU::SI_SPILL_V96_RESTORE:
1061  case AMDGPU::SI_SPILL_V128_RESTORE:
1062  case AMDGPU::SI_SPILL_V256_RESTORE:
1063  case AMDGPU::SI_SPILL_V512_RESTORE: {
1064  const MachineOperand *VData = TII->getNamedOperand(*MI,
1065  AMDGPU::OpName::vdata);
1066 
1067  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1068  Index,
1069  VData->getReg(), VData->isKill(),
1070  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1071  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1072  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1073  *MI->memoperands_begin(),
1074  RS);
1075  MI->eraseFromParent();
1076  break;
1077  }
1078 
1079  default: {
1080  const DebugLoc &DL = MI->getDebugLoc();
1081  bool IsMUBUF = TII->isMUBUF(*MI);
1082 
1083  if (!IsMUBUF &&
1084  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1085  // Convert to an absolute stack address by finding the offset from the
1086  // scratch wave base and scaling by the wave size.
1087  //
1088  // In an entry function/kernel the stack address is already the
1089  // absolute address relative to the scratch wave offset.
1090 
1091  unsigned DiffReg
1092  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1093 
1094  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1095  unsigned ResultReg = IsCopy ?
1096  MI->getOperand(0).getReg() :
1097  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1098 
1099  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1100  .addReg(MFI->getFrameOffsetReg())
1101  .addReg(MFI->getScratchWaveOffsetReg());
1102 
1103  int64_t Offset = FrameInfo.getObjectOffset(Index);
1104  if (Offset == 0) {
1105  // XXX - This never happens because of emergency scavenging slot at 0?
1106  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1107  .addImm(Log2_32(ST.getWavefrontSize()))
1108  .addReg(DiffReg);
1109  } else {
1110  unsigned ScaledReg
1111  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1112 
1113  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1114  .addImm(Log2_32(ST.getWavefrontSize()))
1115  .addReg(DiffReg, RegState::Kill);
1116 
1117  // TODO: Fold if use instruction is another add of a constant.
1119  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1120  .addImm(Offset)
1121  .addReg(ScaledReg, RegState::Kill)
1122  .addImm(0); // clamp bit
1123  } else {
1124  unsigned ConstOffsetReg
1125  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1126 
1127  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1128  .addImm(Offset);
1129  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1130  .addReg(ConstOffsetReg, RegState::Kill)
1131  .addReg(ScaledReg, RegState::Kill)
1132  .addImm(0); // clamp bit
1133  }
1134  }
1135 
1136  // Don't introduce an extra copy if we're just materializing in a mov.
1137  if (IsCopy)
1138  MI->eraseFromParent();
1139  else
1140  FIOp.ChangeToRegister(ResultReg, false, false, true);
1141  return;
1142  }
1143 
1144  if (IsMUBUF) {
1145  // Disable offen so we don't need a 0 vgpr base.
1146  assert(static_cast<int>(FIOperandNum) ==
1147  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1148  AMDGPU::OpName::vaddr));
1149 
1150  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1151  == MFI->getFrameOffsetReg());
1152 
1153  int64_t Offset = FrameInfo.getObjectOffset(Index);
1154  int64_t OldImm
1155  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1156  int64_t NewOffset = OldImm + Offset;
1157 
1158  if (isUInt<12>(NewOffset) &&
1159  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1160  MI->eraseFromParent();
1161  return;
1162  }
1163  }
1164 
1165  // If the offset is simply too big, don't convert to a scratch wave offset
1166  // relative index.
1167 
1168  int64_t Offset = FrameInfo.getObjectOffset(Index);
1169  FIOp.ChangeToImmediate(Offset);
1170  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1171  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1172  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1173  .addImm(Offset);
1174  FIOp.ChangeToRegister(TmpReg, false, false, true);
1175  }
1176  }
1177  }
1178 }
1179 
1181  #define AMDGPU_REG_ASM_NAMES
1182  #include "AMDGPURegAsmNames.inc.cpp"
1183 
1184  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1185  if (Reg >= BeginReg && Reg <= EndReg) { \
1186  unsigned Index = Reg - BeginReg; \
1187  assert(Index < array_lengthof(RegTable)); \
1188  return RegTable[Index]; \
1189  }
1190 
1191  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1192  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1193  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1194  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1195  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1196  VGPR96RegNames);
1197 
1198  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1199  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1200  VGPR128RegNames);
1201  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1202  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1203  SGPR128RegNames);
1204 
1205  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1206  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1207  VGPR256RegNames);
1208 
1209  REG_RANGE(
1210  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1211  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1212  VGPR512RegNames);
1213 
1214  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1215  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1216  SGPR256RegNames);
1217 
1218  REG_RANGE(
1219  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1220  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1221  SGPR512RegNames
1222  );
1223 
1224 #undef REG_RANGE
1225 
1226  // FIXME: Rename flat_scr so we don't need to special case this.
1227  switch (Reg) {
1228  case AMDGPU::FLAT_SCR:
1229  return "flat_scratch";
1230  case AMDGPU::FLAT_SCR_LO:
1231  return "flat_scratch_lo";
1232  case AMDGPU::FLAT_SCR_HI:
1233  return "flat_scratch_hi";
1234  default:
1235  // For the special named registers the default is fine.
1237  }
1238 }
1239 
1240 // FIXME: This is very slow. It might be worth creating a map from physreg to
1241 // register class.
1244 
1245  static const TargetRegisterClass *const BaseClasses[] = {
1246  &AMDGPU::VGPR_32RegClass,
1247  &AMDGPU::SReg_32RegClass,
1248  &AMDGPU::VReg_64RegClass,
1249  &AMDGPU::SReg_64RegClass,
1250  &AMDGPU::VReg_96RegClass,
1251  &AMDGPU::SReg_96RegClass,
1252  &AMDGPU::VReg_128RegClass,
1253  &AMDGPU::SReg_128RegClass,
1254  &AMDGPU::VReg_256RegClass,
1255  &AMDGPU::SReg_256RegClass,
1256  &AMDGPU::VReg_512RegClass,
1257  &AMDGPU::SReg_512RegClass,
1258  &AMDGPU::SCC_CLASSRegClass,
1259  &AMDGPU::Pseudo_SReg_32RegClass,
1260  &AMDGPU::Pseudo_SReg_128RegClass,
1261  };
1262 
1263  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1264  if (BaseClass->contains(Reg)) {
1265  return BaseClass;
1266  }
1267  }
1268  return nullptr;
1269 }
1270 
1271 // TODO: It might be helpful to have some target specific flags in
1272 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1274  unsigned Size = getRegSizeInBits(*RC);
1275  if (Size < 32)
1276  return false;
1277  switch (Size) {
1278  case 32:
1279  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1280  case 64:
1281  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1282  case 96:
1283  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1284  case 128:
1285  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1286  case 256:
1287  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1288  case 512:
1289  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1290  default:
1291  llvm_unreachable("Invalid register class size");
1292  }
1293 }
1294 
1296  const TargetRegisterClass *SRC) const {
1297  switch (getRegSizeInBits(*SRC)) {
1298  case 32:
1299  return &AMDGPU::VGPR_32RegClass;
1300  case 64:
1301  return &AMDGPU::VReg_64RegClass;
1302  case 96:
1303  return &AMDGPU::VReg_96RegClass;
1304  case 128:
1305  return &AMDGPU::VReg_128RegClass;
1306  case 256:
1307  return &AMDGPU::VReg_256RegClass;
1308  case 512:
1309  return &AMDGPU::VReg_512RegClass;
1310  default:
1311  llvm_unreachable("Invalid register class size");
1312  }
1313 }
1314 
1316  const TargetRegisterClass *VRC) const {
1317  switch (getRegSizeInBits(*VRC)) {
1318  case 32:
1319  return &AMDGPU::SGPR_32RegClass;
1320  case 64:
1321  return &AMDGPU::SReg_64RegClass;
1322  case 96:
1323  return &AMDGPU::SReg_96RegClass;
1324  case 128:
1325  return &AMDGPU::SReg_128RegClass;
1326  case 256:
1327  return &AMDGPU::SReg_256RegClass;
1328  case 512:
1329  return &AMDGPU::SReg_512RegClass;
1330  default:
1331  llvm_unreachable("Invalid register class size");
1332  }
1333 }
1334 
1336  const TargetRegisterClass *RC, unsigned SubIdx) const {
1337  if (SubIdx == AMDGPU::NoSubRegister)
1338  return RC;
1339 
1340  // We can assume that each lane corresponds to one 32-bit register.
1341  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1342  if (isSGPRClass(RC)) {
1343  switch (Count) {
1344  case 1:
1345  return &AMDGPU::SGPR_32RegClass;
1346  case 2:
1347  return &AMDGPU::SReg_64RegClass;
1348  case 3:
1349  return &AMDGPU::SReg_96RegClass;
1350  case 4:
1351  return &AMDGPU::SReg_128RegClass;
1352  case 8:
1353  return &AMDGPU::SReg_256RegClass;
1354  case 16: /* fall-through */
1355  default:
1356  llvm_unreachable("Invalid sub-register class size");
1357  }
1358  } else {
1359  switch (Count) {
1360  case 1:
1361  return &AMDGPU::VGPR_32RegClass;
1362  case 2:
1363  return &AMDGPU::VReg_64RegClass;
1364  case 3:
1365  return &AMDGPU::VReg_96RegClass;
1366  case 4:
1367  return &AMDGPU::VReg_128RegClass;
1368  case 8:
1369  return &AMDGPU::VReg_256RegClass;
1370  case 16: /* fall-through */
1371  default:
1372  llvm_unreachable("Invalid sub-register class size");
1373  }
1374  }
1375 }
1376 
1378  const TargetRegisterClass *DefRC,
1379  unsigned DefSubReg,
1380  const TargetRegisterClass *SrcRC,
1381  unsigned SrcSubReg) const {
1382  // We want to prefer the smallest register class possible, so we don't want to
1383  // stop and rewrite on anything that looks like a subregister
1384  // extract. Operations mostly don't care about the super register class, so we
1385  // only want to stop on the most basic of copies between the same register
1386  // class.
1387  //
1388  // e.g. if we have something like
1389  // %0 = ...
1390  // %1 = ...
1391  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1392  // %3 = COPY %2, sub0
1393  //
1394  // We want to look through the COPY to find:
1395  // => %3 = COPY %0
1396 
1397  // Plain copy.
1398  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1399 }
1400 
1401 /// Returns a register that is not used at any point in the function.
1402 /// If all registers are used, then this function will return
1403 // AMDGPU::NoRegister.
1404 unsigned
1406  const TargetRegisterClass *RC,
1407  const MachineFunction &MF) const {
1408 
1409  for (unsigned Reg : *RC)
1410  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1411  return Reg;
1412  return AMDGPU::NoRegister;
1413 }
1414 
1416  unsigned EltSize) const {
1417  if (EltSize == 4) {
1418  static const int16_t Sub0_15[] = {
1419  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1420  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1421  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1422  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1423  };
1424 
1425  static const int16_t Sub0_7[] = {
1426  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1427  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1428  };
1429 
1430  static const int16_t Sub0_3[] = {
1431  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1432  };
1433 
1434  static const int16_t Sub0_2[] = {
1435  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1436  };
1437 
1438  static const int16_t Sub0_1[] = {
1439  AMDGPU::sub0, AMDGPU::sub1,
1440  };
1441 
1442  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1443  case 32:
1444  return {};
1445  case 64:
1446  return makeArrayRef(Sub0_1);
1447  case 96:
1448  return makeArrayRef(Sub0_2);
1449  case 128:
1450  return makeArrayRef(Sub0_3);
1451  case 256:
1452  return makeArrayRef(Sub0_7);
1453  case 512:
1454  return makeArrayRef(Sub0_15);
1455  default:
1456  llvm_unreachable("unhandled register size");
1457  }
1458  }
1459 
1460  if (EltSize == 8) {
1461  static const int16_t Sub0_15_64[] = {
1462  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1463  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1464  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1465  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1466  };
1467 
1468  static const int16_t Sub0_7_64[] = {
1469  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1470  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1471  };
1472 
1473 
1474  static const int16_t Sub0_3_64[] = {
1475  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1476  };
1477 
1478  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1479  case 64:
1480  return {};
1481  case 128:
1482  return makeArrayRef(Sub0_3_64);
1483  case 256:
1484  return makeArrayRef(Sub0_7_64);
1485  case 512:
1486  return makeArrayRef(Sub0_15_64);
1487  default:
1488  llvm_unreachable("unhandled register size");
1489  }
1490  }
1491 
1492  assert(EltSize == 16 && "unhandled register spill split size");
1493 
1494  static const int16_t Sub0_15_128[] = {
1495  AMDGPU::sub0_sub1_sub2_sub3,
1496  AMDGPU::sub4_sub5_sub6_sub7,
1497  AMDGPU::sub8_sub9_sub10_sub11,
1498  AMDGPU::sub12_sub13_sub14_sub15
1499  };
1500 
1501  static const int16_t Sub0_7_128[] = {
1502  AMDGPU::sub0_sub1_sub2_sub3,
1503  AMDGPU::sub4_sub5_sub6_sub7
1504  };
1505 
1506  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1507  case 128:
1508  return {};
1509  case 256:
1510  return makeArrayRef(Sub0_7_128);
1511  case 512:
1512  return makeArrayRef(Sub0_15_128);
1513  default:
1514  llvm_unreachable("unhandled register size");
1515  }
1516 }
1517 
1518 const TargetRegisterClass*
1520  unsigned Reg) const {
1522  return MRI.getRegClass(Reg);
1523 
1524  return getPhysRegClass(Reg);
1525 }
1526 
1528  unsigned Reg) const {
1529  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1530  assert(RC && "Register class for the reg not found");
1531  return hasVGPRs(RC);
1532 }
1533 
1535  const TargetRegisterClass *SrcRC,
1536  unsigned SubReg,
1537  const TargetRegisterClass *DstRC,
1538  unsigned DstSubReg,
1539  const TargetRegisterClass *NewRC,
1540  LiveIntervals &LIS) const {
1541  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1542  unsigned DstSize = getRegSizeInBits(*DstRC);
1543  unsigned NewSize = getRegSizeInBits(*NewRC);
1544 
1545  // Do not increase size of registers beyond dword, we would need to allocate
1546  // adjacent registers and constraint regalloc more than needed.
1547 
1548  // Always allow dword coalescing.
1549  if (SrcSize <= 32 || DstSize <= 32)
1550  return true;
1551 
1552  return NewSize <= DstSize || NewSize <= SrcSize;
1553 }
1554 
1556  MachineFunction &MF) const {
1557 
1558  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1560 
1561  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1562  MF.getFunction());
1563  switch (RC->getID()) {
1564  default:
1565  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1566  case AMDGPU::VGPR_32RegClassID:
1567  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1568  case AMDGPU::SGPR_32RegClassID:
1569  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1570  }
1571 }
1572 
1574  unsigned Idx) const {
1575  if (Idx == getVGPRPressureSet())
1576  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1577  const_cast<MachineFunction &>(MF));
1578 
1579  if (Idx == getSGPRPressureSet())
1580  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1581  const_cast<MachineFunction &>(MF));
1582 
1583  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1584 }
1585 
1586 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1587  static const int Empty[] = { -1 };
1588 
1589  if (hasRegUnit(AMDGPU::M0, RegUnit))
1590  return Empty;
1591  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1592 }
1593 
1595  // Not a callee saved register.
1596  return AMDGPU::SGPR30_SGPR31;
1597 }
1598 
1599 const TargetRegisterClass *
1601  const MachineRegisterInfo &MRI) const {
1602  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1603  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1604  if (!RB)
1605  return nullptr;
1606 
1607  Size = PowerOf2Ceil(Size);
1608  switch (Size) {
1609  case 32:
1610  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1611  &AMDGPU::SReg_32_XM0RegClass;
1612  case 64:
1613  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1614  &AMDGPU::SReg_64_XEXECRegClass;
1615  case 96:
1616  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1617  &AMDGPU::SReg_96RegClass;
1618  case 128:
1619  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1620  &AMDGPU::SReg_128RegClass;
1621  case 256:
1622  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1623  &AMDGPU::SReg_256RegClass;
1624  case 512:
1625  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1626  &AMDGPU::SReg_512RegClass;
1627  default:
1628  llvm_unreachable("not implemented");
1629  }
1630 }
1631 
1632 // Find reaching register definition
1634  MachineInstr &Use,
1636  LiveIntervals *LIS) const {
1637  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1638  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1639  SlotIndex DefIdx;
1640 
1642  if (!LIS->hasInterval(Reg))
1643  return nullptr;
1644  LiveInterval &LI = LIS->getInterval(Reg);
1645  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1646  : MRI.getMaxLaneMaskForVReg(Reg);
1647  VNInfo *V = nullptr;
1648  if (LI.hasSubRanges()) {
1649  for (auto &S : LI.subranges()) {
1650  if ((S.LaneMask & SubLanes) == SubLanes) {
1651  V = S.getVNInfoAt(UseIdx);
1652  break;
1653  }
1654  }
1655  } else {
1656  V = LI.getVNInfoAt(UseIdx);
1657  }
1658  if (!V)
1659  return nullptr;
1660  DefIdx = V->def;
1661  } else {
1662  // Find last def.
1663  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1664  LiveRange &LR = LIS->getRegUnit(*Units);
1665  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1666  if (!DefIdx.isValid() ||
1667  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1668  LIS->getInstructionFromIndex(V->def)))
1669  DefIdx = V->def;
1670  } else {
1671  return nullptr;
1672  }
1673  }
1674  }
1675 
1676  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1677 
1678  if (!Def || !MDT.dominates(Def, &Use))
1679  return nullptr;
1680 
1681  assert(Def->modifiesRegister(Reg, this));
1682 
1683  return Def;
1684 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:637
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:829
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:33
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:717
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:722
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:750
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:418
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:408
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
LiveInterval & getInterval(unsigned Reg)
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:404
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:658
bool hasCalls() const
Return true if the current function has any function calls.