LLVM  9.0.0svn
AMDGPURegisterBankInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/IR/Constants.h"
27 
28 #define GET_TARGET_REGBANK_IMPL
29 #include "AMDGPUGenRegisterBank.inc"
30 
31 // This file will be TableGen'ed at some point.
32 #include "AMDGPUGenRegisterBankInfo.def"
33 
34 using namespace llvm;
35 
38  TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
39 
40  // HACK: Until this is fully tablegen'd.
41  static bool AlreadyInit = false;
42  if (AlreadyInit)
43  return;
44 
45  AlreadyInit = true;
46 
47  const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
48  (void)RBSGPR;
49  assert(&RBSGPR == &AMDGPU::SGPRRegBank);
50 
51  const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
52  (void)RBVGPR;
53  assert(&RBVGPR == &AMDGPU::VGPRRegBank);
54 
55 }
56 
58  const RegisterBank &Src,
59  unsigned Size) const {
60  if (Dst.getID() == AMDGPU::SGPRRegBankID &&
61  Src.getID() == AMDGPU::VGPRRegBankID) {
63  }
64 
65  // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
66  // the valu.
67  if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
68  (Src.getID() == AMDGPU::SGPRRegBankID ||
69  Src.getID() == AMDGPU::VGPRRegBankID ||
70  Src.getID() == AMDGPU::VCCRegBankID))
72 
73  if (Dst.getID() == AMDGPU::SCCRegBankID &&
74  Src.getID() == AMDGPU::VCCRegBankID)
76 
77  return RegisterBankInfo::copyCost(Dst, Src, Size);
78 }
79 
81  const ValueMapping &ValMapping,
82  const RegisterBank *CurBank) const {
83  assert(ValMapping.NumBreakDowns == 2 &&
84  ValMapping.BreakDown[0].Length == 32 &&
85  ValMapping.BreakDown[0].StartIdx == 0 &&
86  ValMapping.BreakDown[1].Length == 32 &&
87  ValMapping.BreakDown[1].StartIdx == 32 &&
88  ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
89 
90  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
91  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
92  // want.
93 
94  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
95  // alignment restrictions, but this probably isn't important.
96  return 1;
97 }
98 
100  const TargetRegisterClass &RC) const {
101 
102  if (TRI->isSGPRClass(&RC))
103  return getRegBank(AMDGPU::SGPRRegBankID);
104 
105  return getRegBank(AMDGPU::VGPRRegBankID);
106 }
107 
110  const MachineInstr &MI) const {
111 
112  const MachineFunction &MF = *MI.getParent()->getParent();
113  const MachineRegisterInfo &MRI = MF.getRegInfo();
114 
115 
116  InstructionMappings AltMappings;
117  switch (MI.getOpcode()) {
118  case TargetOpcode::G_AND:
119  case TargetOpcode::G_OR:
120  case TargetOpcode::G_XOR: {
121  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
122  if (Size != 64)
123  break;
124 
125  const InstructionMapping &SSMapping = getInstructionMapping(
126  1, 1, getOperandsMapping(
127  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
128  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
129  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
130  3); // Num Operands
131  AltMappings.push_back(&SSMapping);
132 
133  const InstructionMapping &VVMapping = getInstructionMapping(
134  2, 2, getOperandsMapping(
135  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
136  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
137  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
138  3); // Num Operands
139  AltMappings.push_back(&VVMapping);
140 
141  const InstructionMapping &SVMapping = getInstructionMapping(
142  3, 3, getOperandsMapping(
143  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
144  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
145  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
146  3); // Num Operands
147  AltMappings.push_back(&SVMapping);
148 
149  // SGPR in LHS is slightly preferrable, so make it VS more expnesive than
150  // SV.
151  const InstructionMapping &VSMapping = getInstructionMapping(
152  3, 4, getOperandsMapping(
153  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
154  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
155  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
156  3); // Num Operands
157  AltMappings.push_back(&VSMapping);
158  break;
159  }
160  case TargetOpcode::G_LOAD: {
161  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
162  // FIXME: Should we be hard coding the size for these mappings?
163  const InstructionMapping &SSMapping = getInstructionMapping(
164  1, 1, getOperandsMapping(
165  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
166  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
167  2); // Num Operands
168  AltMappings.push_back(&SSMapping);
169 
170  const InstructionMapping &VVMapping = getInstructionMapping(
171  2, 1, getOperandsMapping(
172  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
173  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
174  2); // Num Operands
175  AltMappings.push_back(&VVMapping);
176 
177  // FIXME: Should this be the pointer-size (64-bits) or the size of the
178  // register that will hold the bufffer resourc (128-bits).
179  const InstructionMapping &VSMapping = getInstructionMapping(
180  3, 1, getOperandsMapping(
181  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
182  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
183  2); // Num Operands
184  AltMappings.push_back(&VSMapping);
185 
186  return AltMappings;
187 
188  }
189  case TargetOpcode::G_ICMP: {
190  unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
191  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
192  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
193  nullptr, // Predicate operand.
194  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
195  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
196  4); // Num Operands
197  AltMappings.push_back(&SSMapping);
198 
199  const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
200  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
201  nullptr, // Predicate operand.
202  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
203  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
204  4); // Num Operands
205  AltMappings.push_back(&SVMapping);
206 
207  const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
208  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
209  nullptr, // Predicate operand.
210  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
211  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
212  4); // Num Operands
213  AltMappings.push_back(&VSMapping);
214 
215  const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
216  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
217  nullptr, // Predicate operand.
218  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
219  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
220  4); // Num Operands
221  AltMappings.push_back(&VVMapping);
222 
223  return AltMappings;
224  }
225  case TargetOpcode::G_SELECT: {
226  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
227  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
228  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
229  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
230  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
231  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
232  4); // Num Operands
233  AltMappings.push_back(&SSMapping);
234 
235  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
236  getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
237  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
238  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
239  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
240  4); // Num Operands
241  AltMappings.push_back(&VVMapping);
242 
243  return AltMappings;
244  }
245  case TargetOpcode::G_UADDE:
246  case TargetOpcode::G_USUBE:
247  case TargetOpcode::G_SADDE:
248  case TargetOpcode::G_SSUBE: {
249  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
250  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
252  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
253  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
254  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
255  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
256  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
257  5); // Num Operands
258  AltMappings.push_back(&SSMapping);
259 
260  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
261  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
262  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
263  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
264  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
265  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
266  5); // Num Operands
267  AltMappings.push_back(&VVMapping);
268  return AltMappings;
269  }
270  case AMDGPU::G_BRCOND: {
271  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
272 
273  const InstructionMapping &SMapping = getInstructionMapping(
274  1, 1, getOperandsMapping(
275  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
276  2); // Num Operands
277  AltMappings.push_back(&SMapping);
278 
279  const InstructionMapping &VMapping = getInstructionMapping(
280  1, 1, getOperandsMapping(
281  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
282  2); // Num Operands
283  AltMappings.push_back(&VMapping);
284  return AltMappings;
285  }
286  default:
287  break;
288  }
290 }
291 
292 void AMDGPURegisterBankInfo::split64BitValueForMapping(
295  LLT HalfTy,
296  unsigned Reg) const {
297  assert(HalfTy.getSizeInBits() == 32);
299  unsigned LoLHS = MRI->createGenericVirtualRegister(HalfTy);
300  unsigned HiLHS = MRI->createGenericVirtualRegister(HalfTy);
301  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
302  MRI->setRegBank(LoLHS, *Bank);
303  MRI->setRegBank(HiLHS, *Bank);
304 
305  Regs.push_back(LoLHS);
306  Regs.push_back(HiLHS);
307 
308  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
309  .addDef(LoLHS)
310  .addDef(HiLHS)
311  .addUse(Reg);
312 }
313 
314 /// Replace the current type each register in \p Regs has with \p NewTy
316  LLT NewTy) {
317  for (unsigned Reg : Regs) {
318  assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
319  MRI.setType(Reg, NewTy);
320  }
321 }
322 
324  if (Ty.isVector()) {
325  assert(Ty.getNumElements() % 2 == 0);
326  return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
327  }
328 
329  assert(Ty.getSizeInBits() % 2 == 0);
330  return LLT::scalar(Ty.getSizeInBits() / 2);
331 }
332 
333 /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
334 /// any of the required SGPR operands are VGPRs, perform a waterfall loop to
335 /// execute the instruction for each unique combination of values in all lanes
336 /// in the wave. The block will be split such that new blocks
337 void AMDGPURegisterBankInfo::executeInWaterfallLoop(
339  ArrayRef<unsigned> OpIndices) const {
340  MachineFunction *MF = MI.getParent()->getParent();
341  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
342  const SIInstrInfo *TII = ST.getInstrInfo();
344 
345  MachineBasicBlock &MBB = *MI.getParent();
346  const DebugLoc &DL = MI.getDebugLoc();
347 
348  assert(OpIndices.size() == 1 &&
349  "need to implement support for multiple operands");
350 
351  // Use a set to avoid extra readfirstlanes in the case where multiple operands
352  // are the same register.
353  SmallSet<unsigned, 4> SGPROperandRegs;
354  for (unsigned Op : OpIndices) {
355  assert(MI.getOperand(Op).isUse());
356  unsigned Reg = MI.getOperand(Op).getReg();
357  const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
358  if (OpBank->getID() == AMDGPU::VGPRRegBankID)
359  SGPROperandRegs.insert(Reg);
360  }
361 
362  // No operands need to be replaced, so no need to loop.
363  if (SGPROperandRegs.empty())
364  return;
365 
366  MachineIRBuilder B(MI);
367  SmallVector<unsigned, 4> ResultRegs;
368  SmallVector<unsigned, 4> InitResultRegs;
369  SmallVector<unsigned, 4> PhiRegs;
370  for (MachineOperand &Def : MI.defs()) {
371  LLT ResTy = MRI.getType(Def.getReg());
372  const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
373  ResultRegs.push_back(Def.getReg());
374  unsigned InitReg = B.buildUndef(ResTy).getReg(0);
375  unsigned PhiReg = MRI.createGenericVirtualRegister(ResTy);
376  InitResultRegs.push_back(InitReg);
377  PhiRegs.push_back(PhiReg);
378  MRI.setRegBank(PhiReg, *DefBank);
379  MRI.setRegBank(InitReg, *DefBank);
380  }
381 
382  unsigned SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
383  unsigned InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
384 
385  // Don't bother using generic instructions/registers for the exec mask.
386  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
387  .addDef(InitSaveExecReg);
388 
389  // Save the EXEC mask
390  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
391  .addReg(AMDGPU::EXEC);
392 
393  unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
394  unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
395  unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
396 
397  // To insert the loop we need to split the block. Move everything before this
398  // point to a new block, and insert a new empty block before this instruction.
400  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
401  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
402  MachineFunction::iterator MBBI(MBB);
403  ++MBBI;
404  MF->insert(MBBI, LoopBB);
405  MF->insert(MBBI, RestoreExecBB);
406  MF->insert(MBBI, RemainderBB);
407 
408  LoopBB->addSuccessor(RestoreExecBB);
409  LoopBB->addSuccessor(LoopBB);
410 
411  // Move the rest of the block into a new block.
412  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
413  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
414 
415  MBB.addSuccessor(LoopBB);
416  RestoreExecBB->addSuccessor(RemainderBB);
417 
418  B.setInsertPt(*LoopBB, LoopBB->end());
419 
420  B.buildInstr(TargetOpcode::PHI)
421  .addDef(PhiExec)
422  .addReg(InitSaveExecReg)
423  .addMBB(&MBB)
424  .addReg(NewExec)
425  .addMBB(LoopBB);
426 
427  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
428  B.buildInstr(TargetOpcode::G_PHI)
429  .addDef(std::get<2>(Result))
430  .addReg(std::get<0>(Result)) // Initial value / implicit_def
431  .addMBB(&MBB)
432  .addReg(std::get<1>(Result)) // Mid-loop value.
433  .addMBB(LoopBB);
434  }
435 
436  // Move the instruction into the loop.
437  LoopBB->splice(LoopBB->end(), &MBB, I);
438  I = std::prev(LoopBB->end());
439 
440  for (MachineOperand &Op : MI.uses()) {
441  if (!Op.isReg())
442  continue;
443 
444  assert(!Op.isDef());
445  if (SGPROperandRegs.count(Op.getReg())) {
446  unsigned CurrentLaneOpReg
447  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
448  MRI.setType(CurrentLaneOpReg, LLT::scalar(32)); // FIXME
449 
450  assert(MRI.getType(Op.getReg())== LLT::scalar(32) &&
451  "need to implement support for other types");
452 
453  constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
454 
455  // Read the next variant <- also loop target.
456  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
457  CurrentLaneOpReg)
458  .addReg(Op.getReg());
459 
460  // FIXME: Need to and each conditon
461 
462  // Compare the just read SGPR value to all possible operand values.
463  B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
464  .addDef(CondReg)
465  .addReg(CurrentLaneOpReg)
466  .addReg(Op.getReg());
467  Op.setReg(CurrentLaneOpReg);
468  }
469  }
470 
471  // Update EXEC, save the original EXEC value to VCC.
472  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
473  .addDef(NewExec)
474  .addReg(CondReg, RegState::Kill);
475 
476  MRI.setSimpleHint(NewExec, CondReg);
477 
478  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
479  B.buildInstr(AMDGPU::S_XOR_B64_term)
480  .addDef(AMDGPU::EXEC)
481  .addReg(AMDGPU::EXEC)
482  .addReg(NewExec);
483 
484  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
485  // s_cbranch_scc0?
486 
487  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
488  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
489  .addMBB(LoopBB);
490 
491  // Restore the EXEC mask
492  B.buildInstr(AMDGPU::S_MOV_B64_term)
493  .addDef(AMDGPU::EXEC)
494  .addReg(SaveExecReg);
495 }
496 
497 void AMDGPURegisterBankInfo::applyMappingImpl(
498  const OperandsMapper &OpdMapper) const {
499  MachineInstr &MI = OpdMapper.getMI();
500  unsigned Opc = MI.getOpcode();
501  MachineRegisterInfo &MRI = OpdMapper.getMRI();
502  switch (Opc) {
503  case AMDGPU::G_SELECT: {
504  unsigned DstReg = MI.getOperand(0).getReg();
505  LLT DstTy = MRI.getType(DstReg);
506  if (DstTy.getSizeInBits() != 64)
507  break;
508 
509  LLT HalfTy = getHalfSizedType(DstTy);
510 
511  SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0));
512  SmallVector<unsigned, 1> Src0Regs(OpdMapper.getVRegs(1));
513  SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2));
514  SmallVector<unsigned, 2> Src2Regs(OpdMapper.getVRegs(3));
515 
516  // All inputs are SGPRs, nothing special to do.
517  if (DefRegs.empty()) {
518  assert(Src1Regs.empty() && Src2Regs.empty());
519  break;
520  }
521 
522  MachineIRBuilder B(MI);
523  if (Src0Regs.empty())
524  Src0Regs.push_back(MI.getOperand(1).getReg());
525  else {
526  assert(Src0Regs.size() == 1);
527  }
528 
529  if (Src1Regs.empty())
530  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
531  else {
532  setRegsToType(MRI, Src1Regs, HalfTy);
533  }
534 
535  if (Src2Regs.empty())
536  split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
537  else
538  setRegsToType(MRI, Src2Regs, HalfTy);
539 
540  setRegsToType(MRI, DefRegs, HalfTy);
541 
542  B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
543  B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
544 
545  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
546  MI.eraseFromParent();
547  return;
548  }
549  case AMDGPU::G_AND:
550  case AMDGPU::G_OR:
551  case AMDGPU::G_XOR: {
552  // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
553  // there is a VGPR input.
554  unsigned DstReg = MI.getOperand(0).getReg();
555  LLT DstTy = MRI.getType(DstReg);
556  if (DstTy.getSizeInBits() != 64)
557  break;
558 
559  LLT HalfTy = getHalfSizedType(DstTy);
560  SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0));
561  SmallVector<unsigned, 2> Src0Regs(OpdMapper.getVRegs(1));
562  SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2));
563 
564  // All inputs are SGPRs, nothing special to do.
565  if (DefRegs.empty()) {
566  assert(Src0Regs.empty() && Src1Regs.empty());
567  break;
568  }
569 
570  assert(DefRegs.size() == 2);
571  assert(Src0Regs.size() == Src1Regs.size() &&
572  (Src0Regs.empty() || Src0Regs.size() == 2));
573 
574  // Depending on where the source registers came from, the generic code may
575  // have decided to split the inputs already or not. If not, we still need to
576  // extract the values.
577  MachineIRBuilder B(MI);
578 
579  if (Src0Regs.empty())
580  split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
581  else
582  setRegsToType(MRI, Src0Regs, HalfTy);
583 
584  if (Src1Regs.empty())
585  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
586  else
587  setRegsToType(MRI, Src1Regs, HalfTy);
588 
589  setRegsToType(MRI, DefRegs, HalfTy);
590 
591  B.buildInstr(Opc)
592  .addDef(DefRegs[0])
593  .addUse(Src0Regs[0])
594  .addUse(Src1Regs[0]);
595 
596  B.buildInstr(Opc)
597  .addDef(DefRegs[1])
598  .addUse(Src0Regs[1])
599  .addUse(Src1Regs[1]);
600 
601  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
602  MI.eraseFromParent();
603  return;
604  }
605  case AMDGPU::G_EXTRACT_VECTOR_ELT:
606  applyDefaultMapping(OpdMapper);
607  executeInWaterfallLoop(MI, MRI, { 2 });
608  return;
609  default:
610  break;
611  }
612 
613  return applyDefaultMapping(OpdMapper);
614 }
615 
616 static bool isInstrUniform(const MachineInstr &MI) {
617  if (!MI.hasOneMemOperand())
618  return false;
619 
620  const MachineMemOperand *MMO = *MI.memoperands_begin();
621  return AMDGPUInstrInfo::isUniformMMO(MMO);
622 }
623 
624 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
625  const MachineFunction &MF = *MI.getParent()->getParent();
626  const MachineRegisterInfo &MRI = MF.getRegInfo();
627  for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
628  if (!MI.getOperand(i).isReg())
629  continue;
630  unsigned Reg = MI.getOperand(i).getReg();
631  if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
632  if (Bank->getID() == AMDGPU::VGPRRegBankID)
633  return false;
634 
635  assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
636  Bank->getID() == AMDGPU::SCCRegBankID);
637  }
638  }
639  return true;
640 }
641 
643 AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
644  const MachineFunction &MF = *MI.getParent()->getParent();
645  const MachineRegisterInfo &MRI = MF.getRegInfo();
647 
648  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
649  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
650  unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
651  OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
652  }
653  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
654  MI.getNumOperands());
655 }
656 
658 AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
659  const MachineFunction &MF = *MI.getParent()->getParent();
660  const MachineRegisterInfo &MRI = MF.getRegInfo();
662  unsigned OpdIdx = 0;
663 
664  unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
665  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
666 
667  if (MI.getOperand(OpdIdx).isIntrinsicID())
668  OpdsMapping[OpdIdx++] = nullptr;
669 
670  unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
671  unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
672 
673  unsigned DefaultBankID = Size1 == 1 ?
674  AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
675  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
676 
677  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
678 
679  for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
680  unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI);
681  unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
682  OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
683  }
684 
685  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
686  MI.getNumOperands());
687 }
688 
690 AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
691  const MachineFunction &MF = *MI.getParent()->getParent();
692  const MachineRegisterInfo &MRI = MF.getRegInfo();
694 
695  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
696  unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
697  OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
698  }
699 
700  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
701  MI.getNumOperands());
702 }
703 
705 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
706 
707  const MachineFunction &MF = *MI.getParent()->getParent();
708  const MachineRegisterInfo &MRI = MF.getRegInfo();
710  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
711  unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
712 
713  const ValueMapping *ValMapping;
714  const ValueMapping *PtrMapping;
715 
716  if (isInstrUniform(MI)) {
717  // We have a uniform instruction so we want to use an SMRD load
718  ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
719  PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
720  } else {
721  ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
722  // FIXME: What would happen if we used SGPRRegBankID here?
723  PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
724  }
725 
726  OpdsMapping[0] = ValMapping;
727  OpdsMapping[1] = PtrMapping;
729  1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
730  return Mapping;
731 
732  // FIXME: Do we want to add a mapping for FLAT load, or should we just
733  // handle that during instruction selection?
734 }
735 
736 unsigned
737 AMDGPURegisterBankInfo::getRegBankID(unsigned Reg,
738  const MachineRegisterInfo &MRI,
739  const TargetRegisterInfo &TRI,
740  unsigned Default) const {
741 
742  const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
743  return Bank ? Bank->getID() : Default;
744 }
745 
746 ///
747 /// This function must return a legal mapping, because
748 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
749 /// in RegBankSelect::Mode::Fast. Any mapping that would cause a
750 /// VGPR to SGPR generated is illegal.
751 ///
755 
756  if (Mapping.isValid())
757  return Mapping;
758 
759  const MachineFunction &MF = *MI.getParent()->getParent();
760  const MachineRegisterInfo &MRI = MF.getRegInfo();
762 
763  switch (MI.getOpcode()) {
764  default:
766 
767  case AMDGPU::G_AND:
768  case AMDGPU::G_OR:
769  case AMDGPU::G_XOR: {
770  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
771  if (Size == 1) {
772  OpdsMapping[0] = OpdsMapping[1] =
773  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
774  break;
775  }
776 
777  if (Size == 64) {
778 
779  if (isSALUMapping(MI)) {
780  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
781  OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
782  } else {
783  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
784  unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
785  OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
786 
787  unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
788  OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
789  }
790 
791  break;
792  }
793 
795  }
796 
797  case AMDGPU::G_GEP:
798  case AMDGPU::G_ADD:
799  case AMDGPU::G_SUB:
800  case AMDGPU::G_MUL:
801  case AMDGPU::G_SHL:
802  case AMDGPU::G_LSHR:
803  case AMDGPU::G_ASHR:
804  case AMDGPU::G_UADDO:
805  case AMDGPU::G_SADDO:
806  case AMDGPU::G_USUBO:
807  case AMDGPU::G_SSUBO:
808  case AMDGPU::G_UADDE:
809  case AMDGPU::G_SADDE:
810  case AMDGPU::G_USUBE:
811  case AMDGPU::G_SSUBE:
812  case AMDGPU::G_UMULH:
813  case AMDGPU::G_SMULH:
814  if (isSALUMapping(MI))
815  return getDefaultMappingSOP(MI);
817 
818  case AMDGPU::G_FADD:
819  case AMDGPU::G_FSUB:
820  case AMDGPU::G_FPTOSI:
821  case AMDGPU::G_FPTOUI:
822  case AMDGPU::G_FMUL:
823  case AMDGPU::G_FMA:
824  case AMDGPU::G_FSQRT:
825  case AMDGPU::G_SITOFP:
826  case AMDGPU::G_UITOFP:
827  case AMDGPU::G_FPTRUNC:
828  case AMDGPU::G_FPEXT:
829  case AMDGPU::G_FEXP2:
830  case AMDGPU::G_FLOG2:
831  case AMDGPU::G_INTRINSIC_TRUNC:
832  case AMDGPU::G_INTRINSIC_ROUND:
833  return getDefaultMappingVOP(MI);
834  case AMDGPU::G_IMPLICIT_DEF: {
835  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
836  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
837  break;
838  }
839  case AMDGPU::G_FCONSTANT:
840  case AMDGPU::G_CONSTANT:
841  case AMDGPU::G_FRAME_INDEX:
842  case AMDGPU::G_BLOCK_ADDR: {
843  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
844  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
845  break;
846  }
847  case AMDGPU::G_INSERT: {
848  unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
849  AMDGPU::VGPRRegBankID;
850  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
851  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
852  unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
853  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
854  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
855  OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
856  OpdsMapping[3] = nullptr;
857  break;
858  }
859  case AMDGPU::G_EXTRACT: {
860  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
861  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
862  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
863  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
864  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
865  OpdsMapping[2] = nullptr;
866  break;
867  }
868  case AMDGPU::G_MERGE_VALUES: {
869  unsigned Bank = isSALUMapping(MI) ?
870  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
871  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
872  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
873 
874  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
875  // Op1 and Dst should use the same register bank.
876  for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)
877  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
878  break;
879  }
880  case AMDGPU::G_BITCAST:
881  case AMDGPU::G_INTTOPTR:
882  case AMDGPU::G_PTRTOINT:
883  case AMDGPU::G_CTLZ:
884  case AMDGPU::G_CTLZ_ZERO_UNDEF:
885  case AMDGPU::G_CTTZ:
886  case AMDGPU::G_CTTZ_ZERO_UNDEF:
887  case AMDGPU::G_CTPOP:
888  case AMDGPU::G_BSWAP:
889  case AMDGPU::G_FABS:
890  case AMDGPU::G_FNEG: {
891  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
892  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
893  OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
894  break;
895  }
896  case AMDGPU::G_TRUNC: {
897  unsigned Dst = MI.getOperand(0).getReg();
898  unsigned Src = MI.getOperand(1).getReg();
899  unsigned Bank = getRegBankID(Src, MRI, *TRI);
900  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
901  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
902  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
903  OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
904  break;
905  }
906  case AMDGPU::G_ZEXT:
907  case AMDGPU::G_SEXT:
908  case AMDGPU::G_ANYEXT: {
909  unsigned Dst = MI.getOperand(0).getReg();
910  unsigned Src = MI.getOperand(1).getReg();
911  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
912  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
913  unsigned SrcBank = getRegBankID(Src, MRI, *TRI,
914  SrcSize == 1 ? AMDGPU::SGPRRegBankID :
915  AMDGPU::VGPRRegBankID);
916  unsigned DstBank = SrcBank;
917  if (SrcSize == 1) {
918  if (SrcBank == AMDGPU::SGPRRegBankID)
919  DstBank = AMDGPU::VGPRRegBankID;
920  else
921  DstBank = AMDGPU::SGPRRegBankID;
922  }
923 
924  OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
925  OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank, SrcSize);
926  break;
927  }
928  case AMDGPU::G_FCMP: {
929  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
930  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
931  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
932  OpdsMapping[1] = nullptr; // Predicate Operand.
933  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
934  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
935  break;
936  }
937  case AMDGPU::G_STORE: {
938  assert(MI.getOperand(0).isReg());
939  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
940  // FIXME: We need to specify a different reg bank once scalar stores
941  // are supported.
942  const ValueMapping *ValMapping =
943  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
944  // FIXME: Depending on the type of store, the pointer could be in
945  // the SGPR Reg bank.
946  // FIXME: Pointer size should be based on the address space.
947  const ValueMapping *PtrMapping =
948  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
949 
950  OpdsMapping[0] = ValMapping;
951  OpdsMapping[1] = PtrMapping;
952  break;
953  }
954 
955  case AMDGPU::G_ICMP: {
956  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
957  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
958  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
959  unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
960  Op3Bank == AMDGPU::SGPRRegBankID ?
961  AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
962  OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
963  OpdsMapping[1] = nullptr; // Predicate Operand.
964  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
965  OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
966  break;
967  }
968 
969 
970  case AMDGPU::G_EXTRACT_VECTOR_ELT: {
971  unsigned OutputBankID = isSALUMapping(MI) ?
972  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
973  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
974  unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
975  unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
976 
977  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
978  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
979 
980  // The index can be either if the source vector is VGPR.
981  OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
982  break;
983  }
984  case AMDGPU::G_INSERT_VECTOR_ELT: {
985  unsigned OutputBankID = isSALUMapping(MI) ?
986  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
987 
988  unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
989  unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
990  unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
991  unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
992  unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
993 
994  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
995  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
996  OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
997 
998  // The index can be either if the source vector is VGPR.
999  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1000  break;
1001  }
1002  case AMDGPU::G_UNMERGE_VALUES: {
1003  unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1004  AMDGPU::VGPRRegBankID;
1005 
1006  // Op1 and Dst should use the same register bank.
1007  // FIXME: Shouldn't this be the default? Why do we need to handle this?
1008  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1009  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1010  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
1011  }
1012  break;
1013  }
1014  case AMDGPU::G_INTRINSIC: {
1015  switch (MI.getOperand(1).getIntrinsicID()) {
1016  default:
1018  case Intrinsic::maxnum:
1019  case Intrinsic::minnum:
1020  case Intrinsic::amdgcn_cvt_pkrtz:
1021  return getDefaultMappingVOP(MI);
1022  case Intrinsic::amdgcn_kernarg_segment_ptr: {
1023  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1024  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1025  break;
1026  }
1027  case Intrinsic::amdgcn_wqm_vote: {
1028  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1029  OpdsMapping[0] = OpdsMapping[2]
1030  = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1031  break;
1032  }
1033  }
1034  break;
1035  }
1036  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
1037  switch (MI.getOperand(0).getIntrinsicID()) {
1038  default:
1040  case Intrinsic::amdgcn_exp_compr:
1041  OpdsMapping[0] = nullptr; // IntrinsicID
1042  // FIXME: These are immediate values which can't be read from registers.
1043  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1044  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1045  // FIXME: Could we support packed types here?
1046  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1047  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1048  // FIXME: These are immediate values which can't be read from registers.
1049  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1050  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1051  break;
1052  case Intrinsic::amdgcn_exp:
1053  OpdsMapping[0] = nullptr; // IntrinsicID
1054  // FIXME: These are immediate values which can't be read from registers.
1055  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1056  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1057  // FIXME: Could we support packed types here?
1058  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1059  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1060  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1061  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1062  // FIXME: These are immediate values which can't be read from registers.
1063  OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1064  OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1065  break;
1066  }
1067  break;
1068  }
1069  case AMDGPU::G_SELECT: {
1070  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1071  unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1072  AMDGPU::SGPRRegBankID);
1073  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1074  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1075  bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
1076  Op2Bank == AMDGPU::SGPRRegBankID &&
1077  Op3Bank == AMDGPU::SGPRRegBankID;
1078  unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1079  Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
1080 
1081  if (Size == 64) {
1082  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1083  OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
1084  OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1085  OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1086  } else {
1087  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
1088  OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
1089  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
1090  OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
1091  }
1092 
1093  break;
1094  }
1095 
1096  case AMDGPU::G_LOAD:
1097  return getInstrMappingForLoad(MI);
1098 
1099  case AMDGPU::G_ATOMICRMW_XCHG:
1100  case AMDGPU::G_ATOMICRMW_ADD:
1101  case AMDGPU::G_ATOMICRMW_SUB:
1102  case AMDGPU::G_ATOMICRMW_AND:
1103  case AMDGPU::G_ATOMICRMW_OR:
1104  case AMDGPU::G_ATOMICRMW_XOR:
1105  case AMDGPU::G_ATOMICRMW_MAX:
1106  case AMDGPU::G_ATOMICRMW_MIN:
1107  case AMDGPU::G_ATOMICRMW_UMAX:
1108  case AMDGPU::G_ATOMICRMW_UMIN:
1109  case AMDGPU::G_ATOMIC_CMPXCHG: {
1110  return getDefaultMappingAllVGPR(MI);
1111  }
1112  case AMDGPU::G_BRCOND: {
1113  unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
1114  AMDGPU::SGPRRegBankID);
1115  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
1116  if (Bank != AMDGPU::SCCRegBankID)
1117  Bank = AMDGPU::VCCRegBankID;
1118 
1119  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
1120  break;
1121  }
1122  }
1123 
1124  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1125  MI.getNumOperands());
1126 }
1127 
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
Interface definition for SIRegisterInfo.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:491
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
Helper class that represents how the value of an instruction may be mapped and what is the related co...
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
unsigned getReg() const
getReg - Returns the register number.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
bool isIntrinsicID() const
bool isVector() const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
iterator_range< SmallVectorImpl< unsigned >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
bool isSGPRClass(const TargetRegisterClass *RC) const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const RegisterBank * RegBank
Register bank where the partial value lives.
static bool isInstrUniform(const MachineInstr &MI)
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< unsigned > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
MachineRegisterInfo * getMRI()
Getter for MRI.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:660
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Helper class to build MachineInstr.
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned Length
Length of this mapping in bits.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:548
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:480
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1237
bool isValid() const
Check whether this object is valid.
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
static LLT getHalfSizedType(LLT Ty)
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
Intrinsic::ID getIntrinsicID() const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Helper struct that represents how a value is mapped through different register banks.
This file declares the MachineIRBuilder class.
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override
Get a register bank that covers RC.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
#define I(x, y, z)
Definition: MD5.cpp:58
static const TargetRegisterClass * constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
This class provides the information for the target register banks.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr & getMI() const
Helper struct that represents how a value is mapped through different register banks.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
aarch64 promote const
unsigned NumBreakDowns
Number of partial mapping to break down this value.
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
unsigned getReg(unsigned Idx)
Get the register for the operand index.
IRTranslator LLVM IR MI
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1226
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47