LLVM  9.0.0svn
AMDGPURegisterBankInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/IR/Constants.h"
27 
28 #define GET_TARGET_REGBANK_IMPL
29 #include "AMDGPUGenRegisterBank.inc"
30 
31 // This file will be TableGen'ed at some point.
32 #include "AMDGPUGenRegisterBankInfo.def"
33 
34 using namespace llvm;
35 
38  TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
39 
40  // HACK: Until this is fully tablegen'd.
41  static bool AlreadyInit = false;
42  if (AlreadyInit)
43  return;
44 
45  AlreadyInit = true;
46 
47  const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
48  (void)RBSGPR;
49  assert(&RBSGPR == &AMDGPU::SGPRRegBank);
50 
51  const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
52  (void)RBVGPR;
53  assert(&RBVGPR == &AMDGPU::VGPRRegBank);
54 
55 }
56 
58  const RegisterBank &Src,
59  unsigned Size) const {
60  if (Dst.getID() == AMDGPU::SGPRRegBankID &&
61  Src.getID() == AMDGPU::VGPRRegBankID) {
63  }
64 
65  // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
66  // the valu.
67  if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
68  (Src.getID() == AMDGPU::SGPRRegBankID ||
69  Src.getID() == AMDGPU::VGPRRegBankID ||
70  Src.getID() == AMDGPU::VCCRegBankID))
72 
73  if (Dst.getID() == AMDGPU::SCCRegBankID &&
74  Src.getID() == AMDGPU::VCCRegBankID)
76 
77  return RegisterBankInfo::copyCost(Dst, Src, Size);
78 }
79 
81  const ValueMapping &ValMapping,
82  const RegisterBank *CurBank) const {
83  assert(ValMapping.NumBreakDowns == 2 &&
84  ValMapping.BreakDown[0].Length == 32 &&
85  ValMapping.BreakDown[0].StartIdx == 0 &&
86  ValMapping.BreakDown[1].Length == 32 &&
87  ValMapping.BreakDown[1].StartIdx == 32 &&
88  ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
89 
90  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
91  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
92  // want.
93 
94  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
95  // alignment restrictions, but this probably isn't important.
96  return 1;
97 }
98 
100  const TargetRegisterClass &RC) const {
101 
102  if (TRI->isSGPRClass(&RC))
103  return getRegBank(AMDGPU::SGPRRegBankID);
104 
105  return getRegBank(AMDGPU::VGPRRegBankID);
106 }
107 
108 template <unsigned NumOps>
110 AMDGPURegisterBankInfo::addMappingFromTable(
111  const MachineInstr &MI, const MachineRegisterInfo &MRI,
112  const std::array<unsigned, NumOps> RegSrcOpIdx,
113  ArrayRef<OpRegBankEntry<NumOps>> Table) const {
114 
115  InstructionMappings AltMappings;
116 
118 
119  unsigned Sizes[NumOps];
120  for (unsigned I = 0; I < NumOps; ++I) {
121  unsigned Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
122  Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
123  }
124 
125  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
126  unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
127  Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
128  }
129 
130  unsigned MappingID = 0;
131  for (const auto &Entry : Table) {
132  for (unsigned I = 0; I < NumOps; ++I) {
133  int OpIdx = RegSrcOpIdx[I];
134  Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
135  }
136 
137  AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
138  getOperandsMapping(Operands),
139  Operands.size()));
140  }
141 
142  return AltMappings;
143 }
144 
146 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
147  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
148 
149  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
150  case Intrinsic::amdgcn_buffer_load: {
151  static const OpRegBankEntry<3> Table[4] = {
152  // Perfectly legal.
153  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
154  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
155 
156  // Waterfall loop needed for rsrc. In the worst case this will execute
157  // approximately an extra 10 * wavesize + 2 instructions.
158  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
159  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
160  };
161 
162  // rsrc, voffset, offset
163  const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
164  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
165  }
166  case Intrinsic::amdgcn_s_buffer_load: {
167  static const OpRegBankEntry<2> Table[4] = {
168  // Perfectly legal.
169  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
170 
171  // Only need 1 register in loop
172  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
173 
174  // Have to waterfall the resource.
175  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
176 
177  // Have to waterfall the resource, and the offset.
178  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
179  };
180 
181  // rsrc, offset
182  const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
183  return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
184  }
185  default:
187  }
188 }
189 
192  const MachineInstr &MI) const {
193 
194  const MachineFunction &MF = *MI.getParent()->getParent();
195  const MachineRegisterInfo &MRI = MF.getRegInfo();
196 
197 
198  InstructionMappings AltMappings;
199  switch (MI.getOpcode()) {
200  case TargetOpcode::G_AND:
201  case TargetOpcode::G_OR:
202  case TargetOpcode::G_XOR: {
203  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
204  if (Size != 64)
205  break;
206 
207  const InstructionMapping &SSMapping = getInstructionMapping(
208  1, 1, getOperandsMapping(
209  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
210  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
211  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
212  3); // Num Operands
213  AltMappings.push_back(&SSMapping);
214 
215  const InstructionMapping &VVMapping = getInstructionMapping(
216  2, 2, getOperandsMapping(
217  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
218  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
219  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
220  3); // Num Operands
221  AltMappings.push_back(&VVMapping);
222 
223  const InstructionMapping &SVMapping = getInstructionMapping(
224  3, 3, getOperandsMapping(
225  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
226  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
227  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
228  3); // Num Operands
229  AltMappings.push_back(&SVMapping);
230 
231  // SGPR in LHS is slightly preferrable, so make it VS more expnesive than
232  // SV.
233  const InstructionMapping &VSMapping = getInstructionMapping(
234  3, 4, getOperandsMapping(
235  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
236  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
237  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
238  3); // Num Operands
239  AltMappings.push_back(&VSMapping);
240  break;
241  }
242  case TargetOpcode::G_LOAD: {
243  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
244  // FIXME: Should we be hard coding the size for these mappings?
245  const InstructionMapping &SSMapping = getInstructionMapping(
246  1, 1, getOperandsMapping(
247  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
248  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
249  2); // Num Operands
250  AltMappings.push_back(&SSMapping);
251 
252  const InstructionMapping &VVMapping = getInstructionMapping(
253  2, 1, getOperandsMapping(
254  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
255  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
256  2); // Num Operands
257  AltMappings.push_back(&VVMapping);
258 
259  // FIXME: Should this be the pointer-size (64-bits) or the size of the
260  // register that will hold the bufffer resourc (128-bits).
261  const InstructionMapping &VSMapping = getInstructionMapping(
262  3, 1, getOperandsMapping(
263  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
264  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
265  2); // Num Operands
266  AltMappings.push_back(&VSMapping);
267 
268  return AltMappings;
269 
270  }
271  case TargetOpcode::G_ICMP: {
272  unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
273  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
274  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
275  nullptr, // Predicate operand.
276  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
277  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
278  4); // Num Operands
279  AltMappings.push_back(&SSMapping);
280 
281  const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
282  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
283  nullptr, // Predicate operand.
284  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
285  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
286  4); // Num Operands
287  AltMappings.push_back(&SVMapping);
288 
289  const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
290  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
291  nullptr, // Predicate operand.
292  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
293  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
294  4); // Num Operands
295  AltMappings.push_back(&VSMapping);
296 
297  const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
298  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
299  nullptr, // Predicate operand.
300  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
301  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
302  4); // Num Operands
303  AltMappings.push_back(&VVMapping);
304 
305  return AltMappings;
306  }
307  case TargetOpcode::G_SELECT: {
308  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
309  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
310  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
311  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
312  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
313  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
314  4); // Num Operands
315  AltMappings.push_back(&SSMapping);
316 
317  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
318  getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
319  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
320  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
321  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
322  4); // Num Operands
323  AltMappings.push_back(&VVMapping);
324 
325  return AltMappings;
326  }
327  case TargetOpcode::G_UADDE:
328  case TargetOpcode::G_USUBE:
329  case TargetOpcode::G_SADDE:
330  case TargetOpcode::G_SSUBE: {
331  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
332  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
334  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
335  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
336  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
337  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
338  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
339  5); // Num Operands
340  AltMappings.push_back(&SSMapping);
341 
342  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
343  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
344  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
345  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
346  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
347  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
348  5); // Num Operands
349  AltMappings.push_back(&VVMapping);
350  return AltMappings;
351  }
352  case AMDGPU::G_BRCOND: {
353  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
354 
355  const InstructionMapping &SMapping = getInstructionMapping(
356  1, 1, getOperandsMapping(
357  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
358  2); // Num Operands
359  AltMappings.push_back(&SMapping);
360 
361  const InstructionMapping &VMapping = getInstructionMapping(
362  1, 1, getOperandsMapping(
363  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
364  2); // Num Operands
365  AltMappings.push_back(&VMapping);
366  return AltMappings;
367  }
368  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
369  return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
370  default:
371  break;
372  }
374 }
375 
376 void AMDGPURegisterBankInfo::split64BitValueForMapping(
379  LLT HalfTy,
380  unsigned Reg) const {
381  assert(HalfTy.getSizeInBits() == 32);
382  MachineRegisterInfo *MRI = B.getMRI();
383  unsigned LoLHS = MRI->createGenericVirtualRegister(HalfTy);
384  unsigned HiLHS = MRI->createGenericVirtualRegister(HalfTy);
385  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
386  MRI->setRegBank(LoLHS, *Bank);
387  MRI->setRegBank(HiLHS, *Bank);
388 
389  Regs.push_back(LoLHS);
390  Regs.push_back(HiLHS);
391 
392  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
393  .addDef(LoLHS)
394  .addDef(HiLHS)
395  .addUse(Reg);
396 }
397 
398 /// Replace the current type each register in \p Regs has with \p NewTy
400  LLT NewTy) {
401  for (unsigned Reg : Regs) {
402  assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
403  MRI.setType(Reg, NewTy);
404  }
405 }
406 
408  if (Ty.isVector()) {
409  assert(Ty.getNumElements() % 2 == 0);
410  return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
411  }
412 
413  assert(Ty.getSizeInBits() % 2 == 0);
414  return LLT::scalar(Ty.getSizeInBits() / 2);
415 }
416 
417 /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
418 /// any of the required SGPR operands are VGPRs, perform a waterfall loop to
419 /// execute the instruction for each unique combination of values in all lanes
420 /// in the wave. The block will be split such that rest of the instructions are
421 /// moved to a new block.
422 ///
423 /// Essentially performs this loop:
424 //
425 /// Save Execution Mask
426 /// For (Lane : Wavefront) {
427 /// Enable Lane, Disable all other lanes
428 /// SGPR = read SGPR value for current lane from VGPR
429 /// VGPRResult[Lane] = use_op SGPR
430 /// }
431 /// Restore Execution Mask
432 ///
433 /// There is additional complexity to try for compare values to identify the
434 /// unique values used.
435 void AMDGPURegisterBankInfo::executeInWaterfallLoop(
437  ArrayRef<unsigned> OpIndices) const {
438  MachineFunction *MF = MI.getParent()->getParent();
439  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
440  const SIInstrInfo *TII = ST.getInstrInfo();
442 
443  MachineBasicBlock &MBB = *MI.getParent();
444  const DebugLoc &DL = MI.getDebugLoc();
445 
446  // Use a set to avoid extra readfirstlanes in the case where multiple operands
447  // are the same register.
448  SmallSet<unsigned, 4> SGPROperandRegs;
449  for (unsigned Op : OpIndices) {
450  assert(MI.getOperand(Op).isUse());
451  unsigned Reg = MI.getOperand(Op).getReg();
452  const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
453  if (OpBank->getID() == AMDGPU::VGPRRegBankID)
454  SGPROperandRegs.insert(Reg);
455  }
456 
457  // No operands need to be replaced, so no need to loop.
458  if (SGPROperandRegs.empty())
459  return;
460 
461  MachineIRBuilder B(MI);
462  SmallVector<unsigned, 4> ResultRegs;
463  SmallVector<unsigned, 4> InitResultRegs;
464  SmallVector<unsigned, 4> PhiRegs;
465  for (MachineOperand &Def : MI.defs()) {
466  LLT ResTy = MRI.getType(Def.getReg());
467  const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
468  ResultRegs.push_back(Def.getReg());
469  unsigned InitReg = B.buildUndef(ResTy).getReg(0);
470  unsigned PhiReg = MRI.createGenericVirtualRegister(ResTy);
471  InitResultRegs.push_back(InitReg);
472  PhiRegs.push_back(PhiReg);
473  MRI.setRegBank(PhiReg, *DefBank);
474  MRI.setRegBank(InitReg, *DefBank);
475  }
476 
477  unsigned SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
478  unsigned InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
479 
480  // Don't bother using generic instructions/registers for the exec mask.
481  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
482  .addDef(InitSaveExecReg);
483 
484  unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
485  unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
486 
487  // To insert the loop we need to split the block. Move everything before this
488  // point to a new block, and insert a new empty block before this instruction.
490  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
491  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
492  MachineFunction::iterator MBBI(MBB);
493  ++MBBI;
494  MF->insert(MBBI, LoopBB);
495  MF->insert(MBBI, RestoreExecBB);
496  MF->insert(MBBI, RemainderBB);
497 
498  LoopBB->addSuccessor(RestoreExecBB);
499  LoopBB->addSuccessor(LoopBB);
500 
501  // Move the rest of the block into a new block.
502  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
503  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
504 
505  MBB.addSuccessor(LoopBB);
506  RestoreExecBB->addSuccessor(RemainderBB);
507 
508  B.setInsertPt(*LoopBB, LoopBB->end());
509 
510  B.buildInstr(TargetOpcode::PHI)
511  .addDef(PhiExec)
512  .addReg(InitSaveExecReg)
513  .addMBB(&MBB)
514  .addReg(NewExec)
515  .addMBB(LoopBB);
516 
517  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
518  B.buildInstr(TargetOpcode::G_PHI)
519  .addDef(std::get<2>(Result))
520  .addReg(std::get<0>(Result)) // Initial value / implicit_def
521  .addMBB(&MBB)
522  .addReg(std::get<1>(Result)) // Mid-loop value.
523  .addMBB(LoopBB);
524  }
525 
526  // Move the instruction into the loop.
527  LoopBB->splice(LoopBB->end(), &MBB, I);
528  I = std::prev(LoopBB->end());
529 
530  B.setInstr(*I);
531 
532  unsigned CondReg = AMDGPU::NoRegister;
533 
534  for (MachineOperand &Op : MI.uses()) {
535  if (!Op.isReg())
536  continue;
537 
538  assert(!Op.isDef());
539  if (SGPROperandRegs.count(Op.getReg())) {
540  LLT OpTy = MRI.getType(Op.getReg());
541  unsigned OpSize = OpTy.getSizeInBits();
542 
543  // Can only do a readlane of 32-bit pieces.
544  if (OpSize == 32) {
545  // Avoid extra copies in the simple case of one 32-bit register.
546  unsigned CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
547  MRI.setType(CurrentLaneOpReg, OpTy);
548 
549  constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
550  // Read the next variant <- also loop target.
551  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
552  .addReg(Op.getReg());
553 
554  unsigned NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
555  bool First = CondReg == AMDGPU::NoRegister;
556  if (First)
557  CondReg = NewCondReg;
558 
559  // Compare the just read M0 value to all possible Idx values.
560  B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
561  .addDef(NewCondReg)
562  .addReg(CurrentLaneOpReg)
563  .addReg(Op.getReg());
564  Op.setReg(CurrentLaneOpReg);
565 
566  if (!First) {
567  unsigned AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
568 
569  // If there are multiple operands to consider, and the conditions.
570  B.buildInstr(AMDGPU::S_AND_B64)
571  .addDef(AndReg)
572  .addReg(NewCondReg)
573  .addReg(CondReg);
574  CondReg = AndReg;
575  }
576  } else {
577  LLT S32 = LLT::scalar(32);
578  SmallVector<unsigned, 8> ReadlanePieces;
579 
580  // The compares can be done as 64-bit, but the extract needs to be done
581  // in 32-bit pieces.
582 
583  bool Is64 = OpSize % 64 == 0;
584 
585  LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
586  unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
587  : AMDGPU::V_CMP_EQ_U32_e64;
588 
589  // The compares can be done as 64-bit, but the extract needs to be done
590  // in 32-bit pieces.
591 
592  // Insert the unmerge before the loop.
593 
594  B.setMBB(MBB);
595  auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
596  B.setInstr(*I);
597 
598  unsigned NumPieces = Unmerge->getNumOperands() - 1;
599  for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
600  unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
601 
602  unsigned CurrentLaneOpReg;
603  if (Is64) {
604  unsigned CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
605  unsigned CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
606 
607  MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
608  MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
609  MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
610 
611  // Read the next variant <- also loop target.
612  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
613  CurrentLaneOpRegLo)
614  .addReg(UnmergePiece, 0, AMDGPU::sub0);
615 
616  // Read the next variant <- also loop target.
617  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
618  CurrentLaneOpRegHi)
619  .addReg(UnmergePiece, 0, AMDGPU::sub1);
620 
621  CurrentLaneOpReg =
622  B.buildMerge(LLT::scalar(64),
623  {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
624  .getReg(0);
625 
626  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
627 
628  if (OpTy.getScalarSizeInBits() == 64) {
629  // If we need to produce a 64-bit element vector, so use the
630  // merged pieces
631  ReadlanePieces.push_back(CurrentLaneOpReg);
632  } else {
633  // 32-bit element type.
634  ReadlanePieces.push_back(CurrentLaneOpRegLo);
635  ReadlanePieces.push_back(CurrentLaneOpRegHi);
636  }
637  } else {
638  CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
639  MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
640  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
641 
642  // Read the next variant <- also loop target.
643  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
644  CurrentLaneOpReg)
645  .addReg(UnmergePiece);
646  ReadlanePieces.push_back(CurrentLaneOpReg);
647  }
648 
649  unsigned NewCondReg
650  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
651  bool First = CondReg == AMDGPU::NoRegister;
652  if (First)
653  CondReg = NewCondReg;
654 
655  B.buildInstr(CmpOp)
656  .addDef(NewCondReg)
657  .addReg(CurrentLaneOpReg)
658  .addReg(UnmergePiece);
659 
660  if (!First) {
661  unsigned AndReg
662  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
663 
664  // If there are multiple operands to consider, and the conditions.
665  B.buildInstr(AMDGPU::S_AND_B64)
666  .addDef(AndReg)
667  .addReg(NewCondReg)
668  .addReg(CondReg);
669  CondReg = AndReg;
670  }
671  }
672 
673  // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
674  // BUILD_VECTOR
675  if (OpTy.isVector()) {
676  auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
677  Op.setReg(Merge.getReg(0));
678  } else {
679  auto Merge = B.buildMerge(OpTy, ReadlanePieces);
680  Op.setReg(Merge.getReg(0));
681  }
682 
683  MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
684  }
685  }
686  }
687 
688  B.setInsertPt(*LoopBB, LoopBB->end());
689 
690  // Update EXEC, save the original EXEC value to VCC.
691  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
692  .addDef(NewExec)
693  .addReg(CondReg, RegState::Kill);
694 
695  MRI.setSimpleHint(NewExec, CondReg);
696 
697  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
698  B.buildInstr(AMDGPU::S_XOR_B64_term)
699  .addDef(AMDGPU::EXEC)
700  .addReg(AMDGPU::EXEC)
701  .addReg(NewExec);
702 
703  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
704  // s_cbranch_scc0?
705 
706  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
707  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
708  .addMBB(LoopBB);
709 
710  // Save the EXEC mask before the loop.
711  BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
712  .addReg(AMDGPU::EXEC);
713 
714  // Restore the EXEC mask after the loop.
715  B.setMBB(*RestoreExecBB);
716  B.buildInstr(AMDGPU::S_MOV_B64_term)
717  .addDef(AMDGPU::EXEC)
718  .addReg(SaveExecReg);
719 }
720 
721 void AMDGPURegisterBankInfo::applyMappingImpl(
722  const OperandsMapper &OpdMapper) const {
723  MachineInstr &MI = OpdMapper.getMI();
724  unsigned Opc = MI.getOpcode();
725  MachineRegisterInfo &MRI = OpdMapper.getMRI();
726  switch (Opc) {
727  case AMDGPU::G_SELECT: {
728  unsigned DstReg = MI.getOperand(0).getReg();
729  LLT DstTy = MRI.getType(DstReg);
730  if (DstTy.getSizeInBits() != 64)
731  break;
732 
733  LLT HalfTy = getHalfSizedType(DstTy);
734 
735  SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0));
736  SmallVector<unsigned, 1> Src0Regs(OpdMapper.getVRegs(1));
737  SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2));
738  SmallVector<unsigned, 2> Src2Regs(OpdMapper.getVRegs(3));
739 
740  // All inputs are SGPRs, nothing special to do.
741  if (DefRegs.empty()) {
742  assert(Src1Regs.empty() && Src2Regs.empty());
743  break;
744  }
745 
746  MachineIRBuilder B(MI);
747  if (Src0Regs.empty())
748  Src0Regs.push_back(MI.getOperand(1).getReg());
749  else {
750  assert(Src0Regs.size() == 1);
751  }
752 
753  if (Src1Regs.empty())
754  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
755  else {
756  setRegsToType(MRI, Src1Regs, HalfTy);
757  }
758 
759  if (Src2Regs.empty())
760  split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
761  else
762  setRegsToType(MRI, Src2Regs, HalfTy);
763 
764  setRegsToType(MRI, DefRegs, HalfTy);
765 
766  B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
767  B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
768 
769  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
770  MI.eraseFromParent();
771  return;
772  }
773  case AMDGPU::G_AND:
774  case AMDGPU::G_OR:
775  case AMDGPU::G_XOR: {
776  // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
777  // there is a VGPR input.
778  unsigned DstReg = MI.getOperand(0).getReg();
779  LLT DstTy = MRI.getType(DstReg);
780  if (DstTy.getSizeInBits() != 64)
781  break;
782 
783  LLT HalfTy = getHalfSizedType(DstTy);
784  SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0));
785  SmallVector<unsigned, 2> Src0Regs(OpdMapper.getVRegs(1));
786  SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2));
787 
788  // All inputs are SGPRs, nothing special to do.
789  if (DefRegs.empty()) {
790  assert(Src0Regs.empty() && Src1Regs.empty());
791  break;
792  }
793 
794  assert(DefRegs.size() == 2);
795  assert(Src0Regs.size() == Src1Regs.size() &&
796  (Src0Regs.empty() || Src0Regs.size() == 2));
797 
798  // Depending on where the source registers came from, the generic code may
799  // have decided to split the inputs already or not. If not, we still need to
800  // extract the values.
801  MachineIRBuilder B(MI);
802 
803  if (Src0Regs.empty())
804  split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
805  else
806  setRegsToType(MRI, Src0Regs, HalfTy);
807 
808  if (Src1Regs.empty())
809  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
810  else
811  setRegsToType(MRI, Src1Regs, HalfTy);
812 
813  setRegsToType(MRI, DefRegs, HalfTy);
814 
815  B.buildInstr(Opc)
816  .addDef(DefRegs[0])
817  .addUse(Src0Regs[0])
818  .addUse(Src1Regs[0]);
819 
820  B.buildInstr(Opc)
821  .addDef(DefRegs[1])
822  .addUse(Src0Regs[1])
823  .addUse(Src1Regs[1]);
824 
825  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
826  MI.eraseFromParent();
827  return;
828  }
829  case AMDGPU::G_EXTRACT_VECTOR_ELT:
830  applyDefaultMapping(OpdMapper);
831  executeInWaterfallLoop(MI, MRI, { 2 });
832  return;
833 
834  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
835  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
836  case Intrinsic::amdgcn_buffer_load: {
837  executeInWaterfallLoop(MI, MRI, { 2 });
838  return;
839  }
840  case Intrinsic::amdgcn_s_buffer_load: {
841  executeInWaterfallLoop(MI, MRI, { 2, 3 });
842  return;
843  }
844  default:
845  break;
846  }
847  break;
848  }
849  default:
850  break;
851  }
852 
853  return applyDefaultMapping(OpdMapper);
854 }
855 
856 static bool isInstrUniform(const MachineInstr &MI) {
857  if (!MI.hasOneMemOperand())
858  return false;
859 
860  const MachineMemOperand *MMO = *MI.memoperands_begin();
861  return AMDGPUInstrInfo::isUniformMMO(MMO);
862 }
863 
864 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
865  const MachineFunction &MF = *MI.getParent()->getParent();
866  const MachineRegisterInfo &MRI = MF.getRegInfo();
867  for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
868  if (!MI.getOperand(i).isReg())
869  continue;
870  unsigned Reg = MI.getOperand(i).getReg();
871  if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
872  if (Bank->getID() == AMDGPU::VGPRRegBankID)
873  return false;
874 
875  assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
876  Bank->getID() == AMDGPU::SCCRegBankID);
877  }
878  }
879  return true;
880 }
881 
883 AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
884  const MachineFunction &MF = *MI.getParent()->getParent();
885  const MachineRegisterInfo &MRI = MF.getRegInfo();
887 
888  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
889  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
890  unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
891  OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
892  }
893  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
894  MI.getNumOperands());
895 }
896 
898 AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
899  const MachineFunction &MF = *MI.getParent()->getParent();
900  const MachineRegisterInfo &MRI = MF.getRegInfo();
902  unsigned OpdIdx = 0;
903 
904  unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
905  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
906 
907  if (MI.getOperand(OpdIdx).isIntrinsicID())
908  OpdsMapping[OpdIdx++] = nullptr;
909 
910  unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
911  unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
912 
913  unsigned DefaultBankID = Size1 == 1 ?
914  AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
915  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
916 
917  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
918 
919  for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
920  unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI);
921  unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
922  OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
923  }
924 
925  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
926  MI.getNumOperands());
927 }
928 
930 AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
931  const MachineFunction &MF = *MI.getParent()->getParent();
932  const MachineRegisterInfo &MRI = MF.getRegInfo();
934 
935  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
936  unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
937  OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
938  }
939 
940  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
941  MI.getNumOperands());
942 }
943 
945 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
946 
947  const MachineFunction &MF = *MI.getParent()->getParent();
948  const MachineRegisterInfo &MRI = MF.getRegInfo();
950  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
951  unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
952 
953  const ValueMapping *ValMapping;
954  const ValueMapping *PtrMapping;
955 
956  if (isInstrUniform(MI)) {
957  // We have a uniform instruction so we want to use an SMRD load
958  ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
959  PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
960  } else {
961  ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
962  // FIXME: What would happen if we used SGPRRegBankID here?
963  PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
964  }
965 
966  OpdsMapping[0] = ValMapping;
967  OpdsMapping[1] = PtrMapping;
969  1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
970  return Mapping;
971 
972  // FIXME: Do we want to add a mapping for FLAT load, or should we just
973  // handle that during instruction selection?
974 }
975 
976 unsigned
977 AMDGPURegisterBankInfo::getRegBankID(unsigned Reg,
978  const MachineRegisterInfo &MRI,
979  const TargetRegisterInfo &TRI,
980  unsigned Default) const {
981 
982  const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
983  return Bank ? Bank->getID() : Default;
984 }
985 
986 ///
987 /// This function must return a legal mapping, because
988 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
989 /// in RegBankSelect::Mode::Fast. Any mapping that would cause a
990 /// VGPR to SGPR generated is illegal.
991 ///
995 
996  if (Mapping.isValid())
997  return Mapping;
998 
999  const MachineFunction &MF = *MI.getParent()->getParent();
1000  const MachineRegisterInfo &MRI = MF.getRegInfo();
1002 
1003  switch (MI.getOpcode()) {
1004  default:
1006 
1007  case AMDGPU::G_AND:
1008  case AMDGPU::G_OR:
1009  case AMDGPU::G_XOR: {
1010  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1011  if (Size == 1) {
1012  OpdsMapping[0] = OpdsMapping[1] =
1013  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
1014  break;
1015  }
1016 
1017  if (Size == 64) {
1018 
1019  if (isSALUMapping(MI)) {
1020  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
1021  OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
1022  } else {
1023  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
1024  unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
1025  OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
1026 
1027  unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
1028  OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
1029  }
1030 
1031  break;
1032  }
1033 
1035  }
1036 
1037  case AMDGPU::G_GEP:
1038  case AMDGPU::G_ADD:
1039  case AMDGPU::G_SUB:
1040  case AMDGPU::G_MUL:
1041  case AMDGPU::G_SHL:
1042  case AMDGPU::G_LSHR:
1043  case AMDGPU::G_ASHR:
1044  case AMDGPU::G_UADDO:
1045  case AMDGPU::G_SADDO:
1046  case AMDGPU::G_USUBO:
1047  case AMDGPU::G_SSUBO:
1048  case AMDGPU::G_UADDE:
1049  case AMDGPU::G_SADDE:
1050  case AMDGPU::G_USUBE:
1051  case AMDGPU::G_SSUBE:
1052  case AMDGPU::G_UMULH:
1053  case AMDGPU::G_SMULH:
1054  if (isSALUMapping(MI))
1055  return getDefaultMappingSOP(MI);
1057 
1058  case AMDGPU::G_FADD:
1059  case AMDGPU::G_FSUB:
1060  case AMDGPU::G_FPTOSI:
1061  case AMDGPU::G_FPTOUI:
1062  case AMDGPU::G_FMUL:
1063  case AMDGPU::G_FMA:
1064  case AMDGPU::G_FSQRT:
1065  case AMDGPU::G_SITOFP:
1066  case AMDGPU::G_UITOFP:
1067  case AMDGPU::G_FPTRUNC:
1068  case AMDGPU::G_FPEXT:
1069  case AMDGPU::G_FEXP2:
1070  case AMDGPU::G_FLOG2:
1071  case AMDGPU::G_INTRINSIC_TRUNC:
1072  case AMDGPU::G_INTRINSIC_ROUND:
1073  return getDefaultMappingVOP(MI);
1074  case AMDGPU::G_IMPLICIT_DEF: {
1075  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1076  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1077  break;
1078  }
1079  case AMDGPU::G_FCONSTANT:
1080  case AMDGPU::G_CONSTANT:
1081  case AMDGPU::G_FRAME_INDEX:
1082  case AMDGPU::G_BLOCK_ADDR: {
1083  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1084  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1085  break;
1086  }
1087  case AMDGPU::G_INSERT: {
1088  unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1089  AMDGPU::VGPRRegBankID;
1090  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1091  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1092  unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
1093  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1094  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1095  OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
1096  OpdsMapping[3] = nullptr;
1097  break;
1098  }
1099  case AMDGPU::G_EXTRACT: {
1100  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1101  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1102  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1103  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1104  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1105  OpdsMapping[2] = nullptr;
1106  break;
1107  }
1108  case AMDGPU::G_MERGE_VALUES: {
1109  unsigned Bank = isSALUMapping(MI) ?
1110  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1111  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1112  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1113 
1114  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1115  // Op1 and Dst should use the same register bank.
1116  for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)
1117  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
1118  break;
1119  }
1120  case AMDGPU::G_BITCAST:
1121  case AMDGPU::G_INTTOPTR:
1122  case AMDGPU::G_PTRTOINT:
1123  case AMDGPU::G_CTLZ:
1124  case AMDGPU::G_CTLZ_ZERO_UNDEF:
1125  case AMDGPU::G_CTTZ:
1126  case AMDGPU::G_CTTZ_ZERO_UNDEF:
1127  case AMDGPU::G_CTPOP:
1128  case AMDGPU::G_BSWAP:
1129  case AMDGPU::G_FABS:
1130  case AMDGPU::G_FNEG: {
1131  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1132  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1133  OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
1134  break;
1135  }
1136  case AMDGPU::G_TRUNC: {
1137  unsigned Dst = MI.getOperand(0).getReg();
1138  unsigned Src = MI.getOperand(1).getReg();
1139  unsigned Bank = getRegBankID(Src, MRI, *TRI);
1140  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1141  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1142  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1143  OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
1144  break;
1145  }
1146  case AMDGPU::G_ZEXT:
1147  case AMDGPU::G_SEXT:
1148  case AMDGPU::G_ANYEXT: {
1149  unsigned Dst = MI.getOperand(0).getReg();
1150  unsigned Src = MI.getOperand(1).getReg();
1151  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1152  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1153  unsigned SrcBank = getRegBankID(Src, MRI, *TRI,
1154  SrcSize == 1 ? AMDGPU::SGPRRegBankID :
1155  AMDGPU::VGPRRegBankID);
1156  unsigned DstBank = SrcBank;
1157  if (SrcSize == 1) {
1158  if (SrcBank == AMDGPU::SGPRRegBankID)
1159  DstBank = AMDGPU::VGPRRegBankID;
1160  else
1161  DstBank = AMDGPU::SGPRRegBankID;
1162  }
1163 
1164  OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
1165  OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank, SrcSize);
1166  break;
1167  }
1168  case AMDGPU::G_FCMP: {
1169  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1170  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1171  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
1172  OpdsMapping[1] = nullptr; // Predicate Operand.
1173  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1174  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1175  break;
1176  }
1177  case AMDGPU::G_STORE: {
1178  assert(MI.getOperand(0).isReg());
1179  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1180  // FIXME: We need to specify a different reg bank once scalar stores
1181  // are supported.
1182  const ValueMapping *ValMapping =
1183  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1184  // FIXME: Depending on the type of store, the pointer could be in
1185  // the SGPR Reg bank.
1186  // FIXME: Pointer size should be based on the address space.
1187  const ValueMapping *PtrMapping =
1188  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
1189 
1190  OpdsMapping[0] = ValMapping;
1191  OpdsMapping[1] = PtrMapping;
1192  break;
1193  }
1194 
1195  case AMDGPU::G_ICMP: {
1196  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1197  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1198  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1199  unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
1200  Op3Bank == AMDGPU::SGPRRegBankID ?
1201  AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
1202  OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
1203  OpdsMapping[1] = nullptr; // Predicate Operand.
1204  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1205  OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
1206  break;
1207  }
1208 
1209 
1210  case AMDGPU::G_EXTRACT_VECTOR_ELT: {
1211  unsigned OutputBankID = isSALUMapping(MI) ?
1212  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1213  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1214  unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1215  unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1216 
1217  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1218  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1219 
1220  // The index can be either if the source vector is VGPR.
1221  OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1222  break;
1223  }
1224  case AMDGPU::G_INSERT_VECTOR_ELT: {
1225  unsigned OutputBankID = isSALUMapping(MI) ?
1226  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1227 
1228  unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1229  unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1230  unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
1231  unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1232  unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1233 
1234  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1235  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1236  OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
1237 
1238  // The index can be either if the source vector is VGPR.
1239  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1240  break;
1241  }
1242  case AMDGPU::G_UNMERGE_VALUES: {
1243  unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1244  AMDGPU::VGPRRegBankID;
1245 
1246  // Op1 and Dst should use the same register bank.
1247  // FIXME: Shouldn't this be the default? Why do we need to handle this?
1248  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1249  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1250  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
1251  }
1252  break;
1253  }
1254  case AMDGPU::G_INTRINSIC: {
1255  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1256  default:
1258  case Intrinsic::maxnum:
1259  case Intrinsic::minnum:
1260  case Intrinsic::amdgcn_cvt_pkrtz:
1261  return getDefaultMappingVOP(MI);
1262  case Intrinsic::amdgcn_kernarg_segment_ptr: {
1263  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1264  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1265  break;
1266  }
1267  case Intrinsic::amdgcn_wqm_vote: {
1268  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1269  OpdsMapping[0] = OpdsMapping[2]
1270  = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1271  break;
1272  }
1273  }
1274  break;
1275  }
1276  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
1277  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1278  default:
1280  case Intrinsic::amdgcn_exp_compr:
1281  OpdsMapping[0] = nullptr; // IntrinsicID
1282  // FIXME: These are immediate values which can't be read from registers.
1283  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1284  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1285  // FIXME: Could we support packed types here?
1286  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1287  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1288  // FIXME: These are immediate values which can't be read from registers.
1289  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1290  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1291  break;
1292  case Intrinsic::amdgcn_exp:
1293  OpdsMapping[0] = nullptr; // IntrinsicID
1294  // FIXME: These are immediate values which can't be read from registers.
1295  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1296  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1297  // FIXME: Could we support packed types here?
1298  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1299  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1300  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1301  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
1302  // FIXME: These are immediate values which can't be read from registers.
1303  OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1304  OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
1305  break;
1306  case Intrinsic::amdgcn_buffer_load: {
1307  unsigned RSrc = MI.getOperand(2).getReg(); // SGPR
1308  unsigned VIndex = MI.getOperand(3).getReg(); // VGPR
1309  unsigned Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
1310 
1311  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1312  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
1313  unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
1314  unsigned Size4 = MRI.getType(Offset).getSizeInBits();
1315 
1316  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
1317  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
1318 
1319  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
1320  OpdsMapping[1] = nullptr; // intrinsic id
1321 
1322  // Lie and claim everything is legal, even though some need to be
1323  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
1324  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
1325  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
1326  OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
1327  OpdsMapping[5] = nullptr;
1328  OpdsMapping[6] = nullptr;
1329  break;
1330  }
1331  case Intrinsic::amdgcn_s_buffer_load: {
1332  unsigned RSrc = MI.getOperand(2).getReg(); // SGPR
1333  unsigned Offset = MI.getOperand(3).getReg(); // SGPR/imm
1334 
1335  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1336  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
1337  unsigned Size3 = MRI.getType(Offset).getSizeInBits();
1338 
1339  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
1340  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
1341 
1342  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
1343  OpdsMapping[1] = nullptr; // intrinsic id
1344 
1345  // Lie and claim everything is legal, even though some need to be
1346  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
1347  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
1348  OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
1349  OpdsMapping[4] = nullptr;
1350  break;
1351  }
1352  }
1353 
1354  break;
1355  }
1356  case AMDGPU::G_SELECT: {
1357  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1358  unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1359  AMDGPU::SGPRRegBankID);
1360  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1361  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1362  bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
1363  Op2Bank == AMDGPU::SGPRRegBankID &&
1364  Op3Bank == AMDGPU::SGPRRegBankID;
1365  unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1366  Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
1367 
1368  if (Size == 64) {
1369  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1370  OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
1371  OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1372  OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
1373  } else {
1374  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
1375  OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
1376  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
1377  OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
1378  }
1379 
1380  break;
1381  }
1382 
1383  case AMDGPU::G_LOAD:
1384  return getInstrMappingForLoad(MI);
1385 
1386  case AMDGPU::G_ATOMICRMW_XCHG:
1387  case AMDGPU::G_ATOMICRMW_ADD:
1388  case AMDGPU::G_ATOMICRMW_SUB:
1389  case AMDGPU::G_ATOMICRMW_AND:
1390  case AMDGPU::G_ATOMICRMW_OR:
1391  case AMDGPU::G_ATOMICRMW_XOR:
1392  case AMDGPU::G_ATOMICRMW_MAX:
1393  case AMDGPU::G_ATOMICRMW_MIN:
1394  case AMDGPU::G_ATOMICRMW_UMAX:
1395  case AMDGPU::G_ATOMICRMW_UMIN:
1396  case AMDGPU::G_ATOMIC_CMPXCHG: {
1397  return getDefaultMappingAllVGPR(MI);
1398  }
1399  case AMDGPU::G_BRCOND: {
1400  unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
1401  AMDGPU::SGPRRegBankID);
1402  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
1403  if (Bank != AMDGPU::SCCRegBankID)
1404  Bank = AMDGPU::VCCRegBankID;
1405 
1406  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
1407  break;
1408  }
1409  }
1410 
1411  return getInstructionMapping(/*ID*/1, /*Cost*/1,
1412  getOperandsMapping(OpdsMapping),
1413  MI.getNumOperands());
1414 }
1415 
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getReg(unsigned Idx) const
Get the register for the operand index.
Interface definition for SIRegisterInfo.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:491
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
unsigned getScalarSizeInBits() const
Helper class that represents how the value of an instruction may be mapped and what is the related co...
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
unsigned getReg() const
getReg - Returns the register number.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
bool isIntrinsicID() const
bool isVector() const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
iterator_range< SmallVectorImpl< unsigned >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
A description of a memory reference used in the backend.
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
bool isSGPRClass(const TargetRegisterClass *RC) const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const RegisterBank * RegBank
Register bank where the partial value lives.
static bool isInstrUniform(const MachineInstr &MI)
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< unsigned > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
MachineRegisterInfo * getMRI()
Getter for MRI.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:660
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Helper class to build MachineInstr.
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned Length
Length of this mapping in bits.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:548
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:480
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
R600 Clause Merge
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1237
bool isValid() const
Check whether this object is valid.
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static unsigned getIntrinsicID(const SDNode *N)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
static LLT getHalfSizedType(LLT Ty)
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Helper struct that represents how a value is mapped through different register banks.
This file declares the MachineIRBuilder class.
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override
Get a register bank that covers RC.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
#define I(x, y, z)
Definition: MD5.cpp:58
static const TargetRegisterClass * constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
This class provides the information for the target register banks.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr & getMI() const
Helper struct that represents how a value is mapped through different register banks.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
aarch64 promote const
unsigned NumBreakDowns
Number of partial mapping to break down this value.
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1226
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164