LLVM  9.0.0svn
AMDGPURegisterBankInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/IR/Constants.h"
27 
28 #define GET_TARGET_REGBANK_IMPL
29 #include "AMDGPUGenRegisterBank.inc"
30 
31 // This file will be TableGen'ed at some point.
32 #include "AMDGPUGenRegisterBankInfo.def"
33 
34 using namespace llvm;
35 
38  TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
39 
40  // HACK: Until this is fully tablegen'd.
41  static bool AlreadyInit = false;
42  if (AlreadyInit)
43  return;
44 
45  AlreadyInit = true;
46 
47  const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
48  (void)RBSGPR;
49  assert(&RBSGPR == &AMDGPU::SGPRRegBank);
50 
51  const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
52  (void)RBVGPR;
53  assert(&RBVGPR == &AMDGPU::VGPRRegBank);
54 
55 }
56 
58  const RegisterBank &Src,
59  unsigned Size) const {
60  if (Dst.getID() == AMDGPU::SGPRRegBankID &&
61  Src.getID() == AMDGPU::VGPRRegBankID) {
63  }
64 
65  // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
66  // the valu.
67  if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
68  (Src.getID() == AMDGPU::SGPRRegBankID ||
69  Src.getID() == AMDGPU::VGPRRegBankID ||
70  Src.getID() == AMDGPU::VCCRegBankID))
72 
73  if (Dst.getID() == AMDGPU::SCCRegBankID &&
74  Src.getID() == AMDGPU::VCCRegBankID)
76 
77  return RegisterBankInfo::copyCost(Dst, Src, Size);
78 }
79 
81  const ValueMapping &ValMapping,
82  const RegisterBank *CurBank) const {
83  assert(ValMapping.NumBreakDowns == 2 &&
84  ValMapping.BreakDown[0].Length == 32 &&
85  ValMapping.BreakDown[0].StartIdx == 0 &&
86  ValMapping.BreakDown[1].Length == 32 &&
87  ValMapping.BreakDown[1].StartIdx == 32 &&
88  ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
89 
90  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
91  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
92  // want.
93 
94  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
95  // alignment restrictions, but this probably isn't important.
96  return 1;
97 }
98 
100  const TargetRegisterClass &RC) const {
101 
102  if (TRI->isSGPRClass(&RC))
103  return getRegBank(AMDGPU::SGPRRegBankID);
104 
105  return getRegBank(AMDGPU::VGPRRegBankID);
106 }
107 
108 template <unsigned NumOps>
110 AMDGPURegisterBankInfo::addMappingFromTable(
111  const MachineInstr &MI, const MachineRegisterInfo &MRI,
112  const std::array<unsigned, NumOps> RegSrcOpIdx,
113  ArrayRef<OpRegBankEntry<NumOps>> Table) const {
114 
115  InstructionMappings AltMappings;
116 
118 
119  unsigned Sizes[NumOps];
120  for (unsigned I = 0; I < NumOps; ++I) {
121  unsigned Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
122  Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
123  }
124 
125  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
126  unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
127  Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
128  }
129 
130  unsigned MappingID = 0;
131  for (const auto &Entry : Table) {
132  for (unsigned I = 0; I < NumOps; ++I) {
133  int OpIdx = RegSrcOpIdx[I];
134  Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
135  }
136 
137  AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
138  getOperandsMapping(Operands),
139  Operands.size()));
140  }
141 
142  return AltMappings;
143 }
144 
146 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
147  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
148 
149  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
150  case Intrinsic::amdgcn_buffer_load: {
151  static const OpRegBankEntry<3> Table[4] = {
152  // Perfectly legal.
153  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
154  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
155 
156  // Waterfall loop needed for rsrc. In the worst case this will execute
157  // approximately an extra 10 * wavesize + 2 instructions.
158  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
159  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
160  };
161 
162  // rsrc, voffset, offset
163  const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
164  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
165  }
166  case Intrinsic::amdgcn_s_buffer_load: {
167  static const OpRegBankEntry<2> Table[4] = {
168  // Perfectly legal.
169  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
170 
171  // Only need 1 register in loop
172  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
173 
174  // Have to waterfall the resource.
175  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
176 
177  // Have to waterfall the resource, and the offset.
178  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
179  };
180 
181  // rsrc, offset
182  const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
183  return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
184  }
185  default:
187  }
188 }
189 
192  const MachineInstr &MI) const {
193 
194  const MachineFunction &MF = *MI.getParent()->getParent();
195  const MachineRegisterInfo &MRI = MF.getRegInfo();
196 
197 
198  InstructionMappings AltMappings;
199  switch (MI.getOpcode()) {
200  case TargetOpcode::G_AND:
201  case TargetOpcode::G_OR:
202  case TargetOpcode::G_XOR: {
203  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
204  if (Size != 64)
205  break;
206 
207  const InstructionMapping &SSMapping = getInstructionMapping(
208  1, 1, getOperandsMapping(
209  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
210  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
211  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
212  3); // Num Operands
213  AltMappings.push_back(&SSMapping);
214 
215  const InstructionMapping &VVMapping = getInstructionMapping(
216  2, 2, getOperandsMapping(
217  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
218  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
219  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
220  3); // Num Operands
221  AltMappings.push_back(&VVMapping);
222 
223  const InstructionMapping &SVMapping = getInstructionMapping(
224  3, 3, getOperandsMapping(
225  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
226  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
227  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
228  3); // Num Operands
229  AltMappings.push_back(&SVMapping);
230 
231  // SGPR in LHS is slightly preferrable, so make it VS more expnesive than
232  // SV.
233  const InstructionMapping &VSMapping = getInstructionMapping(
234  3, 4, getOperandsMapping(
235  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
236  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
237  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
238  3); // Num Operands
239  AltMappings.push_back(&VSMapping);
240  break;
241  }
242  case TargetOpcode::G_LOAD: {
243  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
244  // FIXME: Should we be hard coding the size for these mappings?
245  const InstructionMapping &SSMapping = getInstructionMapping(
246  1, 1, getOperandsMapping(
247  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
248  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
249  2); // Num Operands
250  AltMappings.push_back(&SSMapping);
251 
252  const InstructionMapping &VVMapping = getInstructionMapping(
253  2, 1, getOperandsMapping(
254  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
255  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
256  2); // Num Operands
257  AltMappings.push_back(&VVMapping);
258 
259  // FIXME: Should this be the pointer-size (64-bits) or the size of the
260  // register that will hold the bufffer resourc (128-bits).
261  const InstructionMapping &VSMapping = getInstructionMapping(
262  3, 1, getOperandsMapping(
263  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
264  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
265  2); // Num Operands
266  AltMappings.push_back(&VSMapping);
267 
268  return AltMappings;
269 
270  }
271  case TargetOpcode::G_ICMP: {
272  unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
273  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
274  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
275  nullptr, // Predicate operand.
276  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
277  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
278  4); // Num Operands
279  AltMappings.push_back(&SSMapping);
280 
281  const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
282  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
283  nullptr, // Predicate operand.
284  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
285  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
286  4); // Num Operands
287  AltMappings.push_back(&SVMapping);
288 
289  const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
290  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
291  nullptr, // Predicate operand.
292  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
293  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
294  4); // Num Operands
295  AltMappings.push_back(&VSMapping);
296 
297  const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
298  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
299  nullptr, // Predicate operand.
300  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
301  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
302  4); // Num Operands
303  AltMappings.push_back(&VVMapping);
304 
305  return AltMappings;
306  }
307  case TargetOpcode::G_SELECT: {
308  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
309  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
310  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
311  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
312  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
313  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
314  4); // Num Operands
315  AltMappings.push_back(&SSMapping);
316 
317  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
318  getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
319  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
320  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
321  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
322  4); // Num Operands
323  AltMappings.push_back(&VVMapping);
324 
325  return AltMappings;
326  }
327  case TargetOpcode::G_UADDE:
328  case TargetOpcode::G_USUBE:
329  case TargetOpcode::G_SADDE:
330  case TargetOpcode::G_SSUBE: {
331  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
332  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
334  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
335  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
336  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
337  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
338  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
339  5); // Num Operands
340  AltMappings.push_back(&SSMapping);
341 
342  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
343  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
344  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
345  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
346  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
347  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
348  5); // Num Operands
349  AltMappings.push_back(&VVMapping);
350  return AltMappings;
351  }
352  case AMDGPU::G_BRCOND: {
353  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
354 
355  const InstructionMapping &SMapping = getInstructionMapping(
356  1, 1, getOperandsMapping(
357  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
358  2); // Num Operands
359  AltMappings.push_back(&SMapping);
360 
361  const InstructionMapping &VMapping = getInstructionMapping(
362  1, 1, getOperandsMapping(
363  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
364  2); // Num Operands
365  AltMappings.push_back(&VMapping);
366  return AltMappings;
367  }
368  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
369  return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
370  default:
371  break;
372  }
374 }
375 
376 void AMDGPURegisterBankInfo::split64BitValueForMapping(
379  LLT HalfTy,
380  unsigned Reg) const {
381  assert(HalfTy.getSizeInBits() == 32);
382  MachineRegisterInfo *MRI = B.getMRI();
383  unsigned LoLHS = MRI->createGenericVirtualRegister(HalfTy);
384  unsigned HiLHS = MRI->createGenericVirtualRegister(HalfTy);
385  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
386  MRI->setRegBank(LoLHS, *Bank);
387  MRI->setRegBank(HiLHS, *Bank);
388 
389  Regs.push_back(LoLHS);
390  Regs.push_back(HiLHS);
391 
392  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
393  .addDef(LoLHS)
394  .addDef(HiLHS)
395  .addUse(Reg);
396 }
397 
398 /// Replace the current type each register in \p Regs has with \p NewTy
400  LLT NewTy) {
401  for (unsigned Reg : Regs) {
402  assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
403  MRI.setType(Reg, NewTy);
404  }
405 }
406 
408  if (Ty.isVector()) {
409  assert(Ty.getNumElements() % 2 == 0);
410  return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
411  }
412 
413  assert(Ty.getSizeInBits() % 2 == 0);
414  return LLT::scalar(Ty.getSizeInBits() / 2);
415 }
416 
417 /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
418 /// any of the required SGPR operands are VGPRs, perform a waterfall loop to
419 /// execute the instruction for each unique combination of values in all lanes
420 /// in the wave. The block will be split such that rest of the instructions are
421 /// moved to a new block.
422 ///
423 /// Essentially performs this loop:
424 //
425 /// Save Execution Mask
426 /// For (Lane : Wavefront) {
427 /// Enable Lane, Disable all other lanes
428 /// SGPR = read SGPR value for current lane from VGPR
429 /// VGPRResult[Lane] = use_op SGPR
430 /// }
431 /// Restore Execution Mask
432 ///
433 /// There is additional complexity to try for compare values to identify the
434 /// unique values used.
435 void AMDGPURegisterBankInfo::executeInWaterfallLoop(
437  ArrayRef<unsigned> OpIndices) const {
438  MachineFunction *MF = MI.getParent()->getParent();
439  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
440  const SIInstrInfo *TII = ST.getInstrInfo();
442 
443  MachineBasicBlock &MBB = *MI.getParent();
444  const DebugLoc &DL = MI.getDebugLoc();
445 
446  // Use a set to avoid extra readfirstlanes in the case where multiple operands
447  // are the same register.
448  SmallSet<unsigned, 4> SGPROperandRegs;
449  for (unsigned Op : OpIndices) {
450  assert(MI.getOperand(Op).isUse());
451  unsigned Reg = MI.getOperand(Op).getReg();
452  const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
453  if (OpBank->getID() == AMDGPU::VGPRRegBankID)
454  SGPROperandRegs.insert(Reg);
455  }
456 
457  // No operands need to be replaced, so no need to loop.
458  if (SGPROperandRegs.empty())
459  return;
460 
461  MachineIRBuilder B(MI);
462  SmallVector<unsigned, 4> ResultRegs;
463  SmallVector<unsigned, 4> InitResultRegs;
464  SmallVector<unsigned, 4> PhiRegs;
465  for (MachineOperand &Def : MI.defs()) {
466  LLT ResTy = MRI.getType(Def.getReg());
467  const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
468  ResultRegs.push_back(Def.getReg());
469  unsigned InitReg = B.buildUndef(ResTy).getReg(0);
470  unsigned PhiReg = MRI.createGenericVirtualRegister(ResTy);
471  InitResultRegs.push_back(InitReg);
472  PhiRegs.push_back(PhiReg);
473  MRI.setRegBank(PhiReg, *DefBank);
474  MRI.setRegBank(InitReg, *DefBank);
475  }
476 
477  unsigned SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
478  unsigned InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
479 
480  // Don't bother using generic instructions/registers for the exec mask.
481  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
482  .addDef(InitSaveExecReg);
483 
484  unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
485  unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
486 
487  // To insert the loop we need to split the block. Move everything before this
488  // point to a new block, and insert a new empty block before this instruction.
490  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
491  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
492  MachineFunction::iterator MBBI(MBB);
493  ++MBBI;
494  MF->insert(MBBI, LoopBB);
495  MF->insert(MBBI, RestoreExecBB);
496  MF->insert(MBBI, RemainderBB);
497 
498  LoopBB->addSuccessor(RestoreExecBB);
499  LoopBB->addSuccessor(LoopBB);
500 
501  // Move the rest of the block into a new block.
502  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
503  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
504 
505  MBB.addSuccessor(LoopBB);
506  RestoreExecBB->addSuccessor(RemainderBB);
507 
508  B.setInsertPt(*LoopBB, LoopBB->end());
509 
510  B.buildInstr(TargetOpcode::PHI)
511  .addDef(PhiExec)
512  .addReg(InitSaveExecReg)
513  .addMBB(&MBB)
514  .addReg(NewExec)
515  .addMBB(LoopBB);
516 
517  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
518  B.buildInstr(TargetOpcode::G_PHI)
519  .addDef(std::get<2>(Result))
520  .addReg(std::get<0>(Result)) // Initial value / implicit_def
521  .addMBB(&MBB)
522  .addReg(std::get<1>(Result)) // Mid-loop value.
523  .addMBB(LoopBB);
524  }
525 
526  // Move the instruction into the loop.
527  LoopBB->splice(LoopBB->end(), &MBB, I);
528  I = std::prev(LoopBB->end());
529 
530  B.setInstr(*I);
531 
532  unsigned CondReg = AMDGPU::NoRegister;
533 
534  for (MachineOperand &Op : MI.uses()) {
535  if (!Op.isReg())
536  continue;
537 
538  assert(!Op.isDef());
539  if (SGPROperandRegs.count(Op.getReg())) {
540  LLT OpTy = MRI.getType(Op.getReg());
541  unsigned OpSize = OpTy.getSizeInBits();
542 
543  // Can only do a readlane of 32-bit pieces.
544  if (OpSize == 32) {
545  // Avoid extra copies in the simple case of one 32-bit register.
546  unsigned CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
547  MRI.setType(CurrentLaneOpReg, OpTy);
548 
549  constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
550  // Read the next variant <- also loop target.
551  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
552  .addReg(Op.getReg());
553 
554  unsigned NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
555  bool First = CondReg == AMDGPU::NoRegister;
556  if (First)
557  CondReg = NewCondReg;
558 
559  // Compare the just read M0 value to all possible Idx values.
560  B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
561  .addDef(NewCondReg)
562  .addReg(CurrentLaneOpReg)
563  .addReg(Op.getReg());
564  Op.setReg(CurrentLaneOpReg);
565 
566  if (!First) {
567  unsigned AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
568 
569  // If there are multiple operands to consider, and the conditions.
570  B.buildInstr(AMDGPU::S_AND_B64)
571  .addDef(AndReg)
572  .addReg(NewCondReg)
573  .addReg(CondReg);
574  CondReg = AndReg;
575  }
576  } else {
577  LLT S32 = LLT::scalar(32);
578  SmallVector<unsigned, 8> ReadlanePieces;
579 
580  // The compares can be done as 64-bit, but the extract needs to be done
581  // in 32-bit pieces.
582 
583  bool Is64 = OpSize % 64 == 0;
584 
585  LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
586  unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
587  : AMDGPU::V_CMP_EQ_U32_e64;
588 
589  // The compares can be done as 64-bit, but the extract needs to be done
590  // in 32-bit pieces.
591 
592  // Insert the unmerge before the loop.
593 
594  B.setMBB(MBB);
595  auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
596  B.setInstr(*I);
597 
598  unsigned NumPieces = Unmerge->getNumOperands() - 1;
599  for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
600  unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
601 
602  unsigned CurrentLaneOpReg;
603  if (Is64) {
604  unsigned CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
605  unsigned CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
606 
607  MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
608  MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
609  MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
610 
611  // Read the next variant <- also loop target.
612  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
613  CurrentLaneOpRegLo)
614  .addReg(UnmergePiece, 0, AMDGPU::sub0);
615 
616  // Read the next variant <- also loop target.
617  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
618  CurrentLaneOpRegHi)
619  .addReg(UnmergePiece, 0, AMDGPU::sub1);
620 
621  CurrentLaneOpReg =
622  B.buildMerge(LLT::scalar(64),
623  {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
624  .getReg(0);
625 
626  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
627 
628  if (OpTy.getScalarSizeInBits() == 64) {
629  // If we need to produce a 64-bit element vector, so use the
630  // merged pieces
631  ReadlanePieces.push_back(CurrentLaneOpReg);
632  } else {
633  // 32-bit element type.
634  ReadlanePieces.push_back(CurrentLaneOpRegLo);
635  ReadlanePieces.push_back(CurrentLaneOpRegHi);
636  }
637  } else {
638  CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
639  MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
640  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
641 
642  // Read the next variant <- also loop target.
643  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
644  CurrentLaneOpReg)
645  .addReg(UnmergePiece);
646  ReadlanePieces.push_back(CurrentLaneOpReg);
647  }
648 
649  unsigned NewCondReg
650  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
651  bool First = CondReg == AMDGPU::NoRegister;
652  if (First)
653  CondReg = NewCondReg;
654 
655  B.buildInstr(CmpOp)
656  .addDef(NewCondReg)
657  .addReg(CurrentLaneOpReg)
658  .addReg(UnmergePiece);
659 
660  if (!First) {
661  unsigned AndReg
662  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
663 
664  // If there are multiple operands to consider, and the conditions.
665  B.buildInstr(AMDGPU::S_AND_B64)
666  .addDef(AndReg)
667  .addReg(NewCondReg)
668  .addReg(CondReg);
669  CondReg = AndReg;
670  }
671  }
672 
673  // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
674  // BUILD_VECTOR
675  if (OpTy.isVector()) {
676  auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
677  Op.setReg(Merge.getReg(0));
678  } else {
679  auto Merge = B.buildMerge(OpTy, ReadlanePieces);
680  Op.setReg(Merge.getReg(0));
681  }
682 
683  MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
684  }
685  }
686  }
687 
688  B.setInsertPt(*LoopBB, LoopBB->end());
689 
690  // Update EXEC, save the original EXEC value to VCC.
691  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
692  .addDef(NewExec)
693  .addReg(CondReg, RegState::Kill);
694 
695  MRI.setSimpleHint(NewExec, CondReg);
696 
697  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
698  B.buildInstr(AMDGPU::S_XOR_B64_term)
699  .addDef(AMDGPU::EXEC)
700  .addReg(AMDGPU::EXEC)
701  .addReg(NewExec);
702 
703  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
704  // s_cbranch_scc0?
705 
706  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
707  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
708  .addMBB(LoopBB);
709 
710  // Save the EXEC mask before the loop.
711  BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
712  .addReg(AMDGPU::EXEC);
713 
714  // Restore the EXEC mask after the loop.
715  B.setMBB(*RestoreExecBB);
716  B.buildInstr(AMDGPU::S_MOV_B64_term)
717  .addDef(AMDGPU::EXEC)
718  .addReg(SaveExecReg);
719 }
720 
721 void AMDGPURegisterBankInfo::applyMappingImpl(
722  const OperandsMapper &OpdMapper) const {
723  MachineInstr &MI = OpdMapper.getMI();
724  unsigned Opc = MI.getOpcode();
725  MachineRegisterInfo &MRI = OpdMapper.getMRI();
726  switch (Opc) {
727  case AMDGPU::G_SELECT: {
728  unsigned DstReg = MI.getOperand(0).getReg();
729  LLT DstTy = MRI.getType(DstReg);
730  if (DstTy.getSizeInBits() != 64)
731  break;
732 
733  LLT HalfTy = getHalfSizedType(DstTy);
734 
735  SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0));
736  SmallVector<unsigned, 1> Src0Regs(OpdMapper.getVRegs(1));
737  SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2));
738  SmallVector<unsigned, 2> Src2Regs(OpdMapper.getVRegs(3));
739 
740  // All inputs are SGPRs, nothing special to do.
741  if (DefRegs.empty()) {
742  assert(Src1Regs.empty() && Src2Regs.empty());
743  break;
744  }
745 
746  MachineIRBuilder B(MI);
747  if (Src0Regs.empty())
748  Src0Regs.push_back(MI.getOperand(1).getReg());
749  else {
750  assert(Src0Regs.size() == 1);
751  }
752 
753  if (Src1Regs.empty())
754  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
755  else {
756  setRegsToType(MRI, Src1Regs, HalfTy);
757  }
758 
759  if (Src2Regs.empty())
760  split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
761  else
762  setRegsToType(MRI, Src2Regs, HalfTy);
763 
764  setRegsToType(MRI, DefRegs, HalfTy);
765 
766  B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
767  B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
768 
769  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
770  MI.eraseFromParent();
771  return;
772  }
773  case AMDGPU::G_AND:
774  case AMDGPU::G_OR:
775  case AMDGPU::G_XOR: {
776  // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
777  // there is a VGPR input.
778  unsigned DstReg = MI.getOperand(0).getReg();
779  LLT DstTy = MRI.getType(DstReg);
780  if (DstTy.getSizeInBits() != 64)
781  break;
782 
783  LLT HalfTy = getHalfSizedType(DstTy);
784  SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0));
785  SmallVector<unsigned, 2> Src0Regs(OpdMapper.getVRegs(1));
786  SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2));
787 
788  // All inputs are SGPRs, nothing special to do.
789  if (DefRegs.empty()) {
790  assert(Src0Regs.empty() && Src1Regs.empty());
791  break;
792  }
793 
794  assert(DefRegs.size() == 2);
795  assert(Src0Regs.size() == Src1Regs.size() &&
796  (Src0Regs.empty() || Src0Regs.size() == 2));
797 
798  // Depending on where the source registers came from, the generic code may
799  // have decided to split the inputs already or not. If not, we still need to
800  // extract the values.
801  MachineIRBuilder B(MI);
802 
803  if (Src0Regs.empty())
804  split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
805  else
806  setRegsToType(MRI, Src0Regs, HalfTy);
807 
808  if (Src1Regs.empty())
809  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
810  else
811  setRegsToType(MRI, Src1Regs, HalfTy);
812 
813  setRegsToType(MRI, DefRegs, HalfTy);
814 
815  B.buildInstr(Opc)
816  .addDef(DefRegs[0])
817  .addUse(Src0Regs[0])
818  .addUse(Src1Regs[0]);
819 
820  B.buildInstr(Opc)
821  .addDef(DefRegs[1])
822  .addUse(Src0Regs[1])
823  .addUse(Src1Regs[1]);
824 
825  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
826  MI.eraseFromParent();
827  return;
828  }
829  case AMDGPU::G_EXTRACT_VECTOR_ELT:
830  applyDefaultMapping(OpdMapper);
831  executeInWaterfallLoop(MI, MRI, { 2 });
832  return;
833  case AMDGPU::G_INTRINSIC: {
834  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
835  case Intrinsic::amdgcn_s_buffer_load: {
836  // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
837  executeInWaterfallLoop(MI, MRI, { 2, 3 });
838  return;
839  }
840  default:
841  break;
842  }
843  break;
844  }
845  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
846  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
847  case Intrinsic::amdgcn_buffer_load: {
848  executeInWaterfallLoop(MI, MRI, { 2 });
849  return;
850  }
851  default:
852  break;
853  }
854  break;
855  }
856  default:
857  break;
858  }
859 
860  return applyDefaultMapping(OpdMapper);
861 }
862 
863 static bool isInstrUniform(const MachineInstr &MI) {
864  if (!MI.hasOneMemOperand())
865  return false;
866 
867  const MachineMemOperand *MMO = *MI.memoperands_begin();
868  return AMDGPUInstrInfo::isUniformMMO(MMO);
869 }
870 
871 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
872  const MachineFunction &MF = *MI.getParent()->getParent();
873  const MachineRegisterInfo &MRI = MF.getRegInfo();
874  for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
875  if (!MI.getOperand(i).isReg())
876  continue;
877  unsigned Reg = MI.getOperand(i).getReg();
878  if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
879  if (Bank->getID() == AMDGPU::VGPRRegBankID)
880  return false;
881 
882  assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
883  Bank->getID() == AMDGPU::SCCRegBankID);
884  }
885  }
886  return true;
887 }
888 
890 AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
891  const MachineFunction &MF = *MI.getParent()->getParent();
892  const MachineRegisterInfo &MRI = MF.getRegInfo();
894 
895  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
896  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
897  unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
898  OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
899  }
900  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
901  MI.getNumOperands());
902 }
903 
905 AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
906  const MachineFunction &MF = *MI.getParent()->getParent();
907  const MachineRegisterInfo &MRI = MF.getRegInfo();
909  unsigned OpdIdx = 0;
910 
911  unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
912  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
913 
914  if (MI.getOperand(OpdIdx).isIntrinsicID())
915  OpdsMapping[OpdIdx++] = nullptr;
916 
917  unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
918  unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
919 
920  unsigned DefaultBankID = Size1 == 1 ?
921  AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
922  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
923 
924  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
925 
926  for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
927  const MachineOperand &MO = MI.getOperand(OpdIdx);
928  if (!MO.isReg())
929  continue;
930 
931  unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
932  unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
933  OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
934  }
935 
936  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
937  MI.getNumOperands());
938 }
939 
941 AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
942  const MachineFunction &MF = *MI.getParent()->getParent();
943  const MachineRegisterInfo &MRI = MF.getRegInfo();
945 
946  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
947  unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
948  OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
949  }
950 
951  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
952  MI.getNumOperands());
953 }
954 
956 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
957 
958  const MachineFunction &MF = *MI.getParent()->getParent();
959  const MachineRegisterInfo &MRI = MF.getRegInfo();
961  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
962  unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
963 
964  const ValueMapping *ValMapping;
965  const ValueMapping *PtrMapping;
966 
967  if (isInstrUniform(MI)) {
968  // We have a uniform instruction so we want to use an SMRD load
969  ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
970  PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
971  } else {
972  ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
973  // FIXME: What would happen if we used SGPRRegBankID here?
974  PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
975  }
976 
977  OpdsMapping[0] = ValMapping;
978  OpdsMapping[1] = PtrMapping;
980  1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
981  return Mapping;
982 
983  // FIXME: Do we want to add a mapping for FLAT load, or should we just
984  // handle that during instruction selection?
985 }
986 
987 unsigned
988 AMDGPURegisterBankInfo::getRegBankID(unsigned Reg,
989  const MachineRegisterInfo &MRI,
990  const TargetRegisterInfo &TRI,
991  unsigned Default) const {
992 
993  const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
994  return Bank ? Bank->getID() : Default;
995 }
996 
997 ///
998 /// This function must return a legal mapping, because
999 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
1000 /// in RegBankSelect::Mode::Fast. Any mapping that would cause a
1001 /// VGPR to SGPR generated is illegal.
1002 ///
1005  const MachineFunction &MF = *MI.getParent()->getParent();
1006  const MachineRegisterInfo &MRI = MF.getRegInfo();
1007 
1008  if (MI.isRegSequence()) {
1009  // If any input is a VGPR, the result must be a VGPR. The default handling
1010  // assumes any copy between banks is legal.
1011  unsigned BankID = AMDGPU::SGPRRegBankID;
1012 
1013  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1014  auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
1015  // It doesn't make sense to use vcc or scc banks here, so just ignore
1016  // them.
1017  if (OpBank != AMDGPU::SGPRRegBankID) {
1018  BankID = AMDGPU::VGPRRegBankID;
1019  break;
1020  }
1021  }
1022  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1023 
1024  const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID));
1025  return getInstructionMapping(
1026  1, /*Cost*/ 1,
1027  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1028  }
1029 
1030  // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
1031  // properly.
1032  //
1033  // TODO: There are additional exec masking dependencies to analyze.
1034  if (MI.getOpcode() == TargetOpcode::G_PHI) {
1035  // TODO: Generate proper invalid bank enum.
1036  int ResultBank = -1;
1037 
1038  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1039  unsigned Reg = MI.getOperand(I).getReg();
1040  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
1041 
1042  // FIXME: Assuming VGPR for any undetermined inputs.
1043  if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {
1044  ResultBank = AMDGPU::VGPRRegBankID;
1045  break;
1046  }
1047 
1048  unsigned OpBank = Bank->getID();
1049  // scc, scc -> sgpr
1050  if (OpBank == AMDGPU::SCCRegBankID) {
1051  // There's only one SCC register, so a phi requires copying to SGPR.
1052  OpBank = AMDGPU::SGPRRegBankID;
1053  } else if (OpBank == AMDGPU::VCCRegBankID) {
1054  // vcc, vcc -> vcc
1055  // vcc, sgpr -> vgpr
1056  if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) {
1057  ResultBank = AMDGPU::VGPRRegBankID;
1058  break;
1059  }
1060  }
1061 
1062  ResultBank = OpBank;
1063  }
1064 
1065  assert(ResultBank != -1);
1066 
1067  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1068 
1069  const ValueMapping &ValMap =
1070  getValueMapping(0, Size, getRegBank(ResultBank));
1071  return getInstructionMapping(
1072  1, /*Cost*/ 1,
1073  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1074  }
1075 
1077  if (Mapping.isValid())
1078  return Mapping;
1079 
1081 
1082  switch (MI.getOpcode()) {
1083  default:
1085 
1086  case AMDGPU::G_AND:
1087  case AMDGPU::G_OR:
1088  case AMDGPU::G_XOR: {
1089  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1090  if (Size == 1) {
1091  OpdsMapping[0] = OpdsMapping[1] =
1092  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
1093  break;
1094  }
1095 
1096  if (Size == 64) {
1097 
1098  if (isSALUMapping(MI)) {
1099  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
1100  OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
1101  } else {
1102  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
1103  unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
1104  OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
1105 
1106  unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
1107  OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
1108  }
1109 
1110  break;
1111  }
1112 
1114  }
1115 
1116  case AMDGPU::G_GEP:
1117  case AMDGPU::G_ADD:
1118  case AMDGPU::G_SUB:
1119  case AMDGPU::G_MUL:
1120  case AMDGPU::G_SHL:
1121  case AMDGPU::G_LSHR:
1122  case AMDGPU::G_ASHR:
1123  case AMDGPU::G_UADDO:
1124  case AMDGPU::G_SADDO:
1125  case AMDGPU::G_USUBO:
1126  case AMDGPU::G_SSUBO:
1127  case AMDGPU::G_UADDE:
1128  case AMDGPU::G_SADDE:
1129  case AMDGPU::G_USUBE:
1130  case AMDGPU::G_SSUBE:
1131  case AMDGPU::G_UMULH:
1132  case AMDGPU::G_SMULH:
1133  if (isSALUMapping(MI))
1134  return getDefaultMappingSOP(MI);
1136 
1137  case AMDGPU::G_SMIN:
1138  case AMDGPU::G_SMAX:
1139  case AMDGPU::G_UMIN:
1140  case AMDGPU::G_UMAX:
1141  // TODO: min/max can be scalar, but requires expanding as a compare and
1142  // select.
1143 
1144  case AMDGPU::G_FADD:
1145  case AMDGPU::G_FSUB:
1146  case AMDGPU::G_FPTOSI:
1147  case AMDGPU::G_FPTOUI:
1148  case AMDGPU::G_FMUL:
1149  case AMDGPU::G_FMA:
1150  case AMDGPU::G_FSQRT:
1151  case AMDGPU::G_SITOFP:
1152  case AMDGPU::G_UITOFP:
1153  case AMDGPU::G_FPTRUNC:
1154  case AMDGPU::G_FPEXT:
1155  case AMDGPU::G_FEXP2:
1156  case AMDGPU::G_FLOG2:
1157  case AMDGPU::G_INTRINSIC_TRUNC:
1158  case AMDGPU::G_INTRINSIC_ROUND:
1159  return getDefaultMappingVOP(MI);
1160  case AMDGPU::G_IMPLICIT_DEF: {
1161  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1162  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1163  break;
1164  }
1165  case AMDGPU::G_FCONSTANT:
1166  case AMDGPU::G_CONSTANT:
1167  case AMDGPU::G_FRAME_INDEX:
1168  case AMDGPU::G_BLOCK_ADDR: {
1169  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1170  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1171  break;
1172  }
1173  case AMDGPU::G_INSERT: {
1174  unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1175  AMDGPU::VGPRRegBankID;
1176  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1177  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1178  unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
1179  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1180  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1181  OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
1182  OpdsMapping[3] = nullptr;
1183  break;
1184  }
1185  case AMDGPU::G_EXTRACT: {
1186  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1187  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1188  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1189  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1190  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1191  OpdsMapping[2] = nullptr;
1192  break;
1193  }
1194  case AMDGPU::G_MERGE_VALUES: {
1195  unsigned Bank = isSALUMapping(MI) ?
1196  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1197  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1198  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1199 
1200  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1201  // Op1 and Dst should use the same register bank.
1202  for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)
1203  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
1204  break;
1205  }
1206  case AMDGPU::G_BITCAST:
1207  case AMDGPU::G_INTTOPTR:
1208  case AMDGPU::G_PTRTOINT:
1209  case AMDGPU::G_CTLZ:
1210  case AMDGPU::G_CTLZ_ZERO_UNDEF:
1211  case AMDGPU::G_CTTZ:
1212  case AMDGPU::G_CTTZ_ZERO_UNDEF:
1213  case AMDGPU::G_CTPOP:
1214  case AMDGPU::G_BSWAP:
1215  case AMDGPU::G_FABS:
1216  case AMDGPU::G_FNEG: {
1217  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1218  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1219  OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
1220  break;
1221  }
1222  case AMDGPU::G_TRUNC: {
1223  unsigned Dst = MI.getOperand(0).getReg();
1224  unsigned Src = MI.getOperand(1).getReg();
1225  unsigned Bank = getRegBankID(Src, MRI, *TRI);
1226  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1227  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1228  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1229  OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
1230  break;
1231  }
1232  case AMDGPU::G_ZEXT:
1233  case AMDGPU::G_SEXT:
1234  case AMDGPU::G_ANYEXT: {
1235  unsigned Dst = MI.getOperand(0).getReg();
1236  unsigned Src = MI.getOperand(1).getReg();
1237  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1238  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1239  unsigned SrcBank = getRegBankID(Src, MRI, *TRI,
1240  SrcSize == 1 ? AMDGPU::SGPRRegBankID :
1241  AMDGPU::VGPRRegBankID);
1242  unsigned DstBank = SrcBank;
1243  if (SrcSize == 1) {
1244  if (SrcBank == AMDGPU::SGPRRegBankID)
1245  DstBank = AMDGPU::VGPRRegBankID;
1246  else
1247  DstBank = AMDGPU::SGPRRegBankID;
1248  }
1249 
1250  OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
1251  OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank, SrcSize);
1252  break;
1253  }
1254  case AMDGPU::G_FCMP: {
1255  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1256  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1257  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
1258  OpdsMapping[1] = nullptr; // Predicate Operand.
1259  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1260  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1261  break;
1262  }
1263  case AMDGPU::G_STORE: {
1264  assert(MI.getOperand(0).isReg());
1265  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1266  // FIXME: We need to specify a different reg bank once scalar stores
1267  // are supported.
1268  const ValueMapping *ValMapping =
1269  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1270  // FIXME: Depending on the type of store, the pointer could be in
1271  // the SGPR Reg bank.
1272  // FIXME: Pointer size should be based on the address space.
1273  const ValueMapping *PtrMapping =
1274  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
1275 
1276  OpdsMapping[0] = ValMapping;
1277  OpdsMapping[1] = PtrMapping;
1278  break;
1279  }
1280 
1281  case AMDGPU::G_ICMP: {
1282  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1283  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1284  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1285  unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
1286  Op3Bank == AMDGPU::SGPRRegBankID ?
1287  AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
1288  OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
1289  OpdsMapping[1] = nullptr; // Predicate Operand.
1290  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1291  OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
1292  break;
1293  }
1294 
1295 
1296  case AMDGPU::G_EXTRACT_VECTOR_ELT: {
1297  unsigned OutputBankID = isSALUMapping(MI) ?
1298  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1299  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1300  unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1301  unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1302 
1303  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1304  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1305 
1306  // The index can be either if the source vector is VGPR.
1307  OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1308  break;
1309  }
1310  case AMDGPU::G_INSERT_VECTOR_ELT: {
1311  unsigned OutputBankID = isSALUMapping(MI) ?
1312  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1313 
1314  unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1315  unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1316  unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
1317  unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1318  unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1319 
1320  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1321  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1322  OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
1323 
1324  // The index can be either if the source vector is VGPR.
1325  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1326  break;
1327  }
1328  case AMDGPU::G_UNMERGE_VALUES: {
1329  unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1330  AMDGPU::VGPRRegBankID;
1331 
1332  // Op1 and Dst should use the same register bank.
1333  // FIXME: Shouldn't this be the default? Why do we need to handle this?
1334  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1335  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1336  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
1337  }
1338  break;
1339  }
1340  case AMDGPU::G_INTRINSIC: {
1341  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1342  default:
1344  case Intrinsic::maxnum:
1345  case Intrinsic::minnum:
1346  case Intrinsic::amdgcn_cvt_pkrtz:
1347  return getDefaultMappingVOP(MI);
1348  case Intrinsic::amdgcn_kernarg_segment_ptr: {
1349  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1350  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1351  break;
1352  }
1353  case Intrinsic::amdgcn_wqm_vote: {
1354  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1355  OpdsMapping[0] = OpdsMapping[2]
1356  = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1357  break;
1358  }
1359  case Intrinsic::amdgcn_s_buffer_load: {
1360  // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
1361  unsigned RSrc = MI.getOperand(2).getReg(); // SGPR
1362  unsigned Offset = MI.getOperand(3).getReg(); // SGPR/imm
1363 
1364  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1365  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
1366  unsigned Size3 = MRI.getType(Offset).getSizeInBits();
1367 
1368  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
1369  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
1370 
1371  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
1372  OpdsMapping[1] = nullptr; // intrinsic id
1373 
1374  // Lie and claim everything is legal, even though some need to be
1375  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
1376  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
1377  OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
1378  OpdsMapping[4] = nullptr;
1379  break;
1380  }
1381  }
1382  break;
1383  }
1384  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
1385  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1386  default:
1388  case Intrinsic::amdgcn_exp_compr:
1389  OpdsMapping[0] = nullptr; // IntrinsicID
1390  // FIXME: These are immediate values which can't be read from registers.
1391  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1392  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1393  // FIXME: Could we support packed types here?
1394  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1395  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1396  // FIXME: These are immediate values which can't be read from registers.
1397  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1398  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1399  break;
1400  case Intrinsic::amdgcn_exp:
1401  OpdsMapping[0] = nullptr; // IntrinsicID
1402  // FIXME: These are immediate values which can't be read from registers.
1403  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1404  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1405  // FIXME: Could we support packed types here?
1406  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1407  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1408  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1409  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1410  // FIXME: These are immediate values which can't be read from registers.
1411  OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1412  OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1413  break;
1414  case Intrinsic::amdgcn_buffer_load: {
1415  unsigned RSrc = MI.getOperand(2).getReg(); // SGPR
1416  unsigned VIndex = MI.getOperand(3).getReg(); // VGPR
1417  unsigned Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
1418 
1419  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1420  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
1421  unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
1422  unsigned Size4 = MRI.getType(Offset).getSizeInBits();
1423 
1424  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
1425  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
1426 
1427  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
1428  OpdsMapping[1] = nullptr; // intrinsic id
1429 
1430  // Lie and claim everything is legal, even though some need to be
1431  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
1432  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
1433  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
1434  OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
1435  OpdsMapping[5] = nullptr;
1436  OpdsMapping[6] = nullptr;
1437  break;
1438  }
1439  }
1440 
1441  break;
1442  }
1443  case AMDGPU::G_SELECT: {
1444  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1445  unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1446  AMDGPU::SGPRRegBankID);
1447  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1448  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1449  bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
1450  Op2Bank == AMDGPU::SGPRRegBankID &&
1451  Op3Bank == AMDGPU::SGPRRegBankID;
1452  unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1453  Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
1454 
1455  if (Size == 64) {
1456  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1457  OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
1458  OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1459  OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1460  } else {
1461  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
1462  OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
1463  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
1464  OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
1465  }
1466 
1467  break;
1468  }
1469 
1470  case AMDGPU::G_LOAD:
1471  return getInstrMappingForLoad(MI);
1472 
1473  case AMDGPU::G_ATOMICRMW_XCHG:
1474  case AMDGPU::G_ATOMICRMW_ADD:
1475  case AMDGPU::G_ATOMICRMW_SUB:
1476  case AMDGPU::G_ATOMICRMW_AND:
1477  case AMDGPU::G_ATOMICRMW_OR:
1478  case AMDGPU::G_ATOMICRMW_XOR:
1479  case AMDGPU::G_ATOMICRMW_MAX:
1480  case AMDGPU::G_ATOMICRMW_MIN:
1481  case AMDGPU::G_ATOMICRMW_UMAX:
1482  case AMDGPU::G_ATOMICRMW_UMIN:
1483  case AMDGPU::G_ATOMIC_CMPXCHG: {
1484  return getDefaultMappingAllVGPR(MI);
1485  }
1486  case AMDGPU::G_BRCOND: {
1487  unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
1488  AMDGPU::SGPRRegBankID);
1489  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
1490  if (Bank != AMDGPU::SCCRegBankID)
1491  Bank = AMDGPU::VCCRegBankID;
1492 
1493  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
1494  break;
1495  }
1496  }
1497 
1498  return getInstructionMapping(/*ID*/1, /*Cost*/1,
1499  getOperandsMapping(OpdsMapping),
1500  MI.getNumOperands());
1501 }
1502 
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getReg(unsigned Idx) const
Get the register for the operand index.
Interface definition for SIRegisterInfo.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:493
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
unsigned getScalarSizeInBits() const
Helper class that represents how the value of an instruction may be mapped and what is the related co...
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
unsigned getReg() const
getReg - Returns the register number.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
bool isRegSequence() const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
bool isIntrinsicID() const
bool isVector() const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
iterator_range< SmallVectorImpl< unsigned >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
A description of a memory reference used in the backend.
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
bool isSGPRClass(const TargetRegisterClass *RC) const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const RegisterBank * RegBank
Register bank where the partial value lives.
static bool isInstrUniform(const MachineInstr &MI)
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< unsigned > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
MachineRegisterInfo * getMRI()
Getter for MRI.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:667
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Helper class to build MachineInstr.
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned Length
Length of this mapping in bits.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:550
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:482
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
R600 Clause Merge
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1248
bool isValid() const
Check whether this object is valid.
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static unsigned getIntrinsicID(const SDNode *N)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:535
static LLT getHalfSizedType(LLT Ty)
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Helper struct that represents how a value is mapped through different register banks.
This file declares the MachineIRBuilder class.
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override
Get a register bank that covers RC.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
#define I(x, y, z)
Definition: MD5.cpp:58
static const TargetRegisterClass * constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
This class provides the information for the target register banks.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr & getMI() const
Helper struct that represents how a value is mapped through different register banks.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
aarch64 promote const
unsigned NumBreakDowns
Number of partial mapping to break down this value.
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1237
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164