LLVM  10.0.0svn
AMDGPURegisterBankInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/IR/Constants.h"
28 
29 #define GET_TARGET_REGBANK_IMPL
30 #include "AMDGPUGenRegisterBank.inc"
31 
32 // This file will be TableGen'ed at some point.
33 #include "AMDGPUGenRegisterBankInfo.def"
34 
35 using namespace llvm;
36 
37 namespace {
38 
39 // Observer to apply a register bank to new registers created by LegalizerHelper.
40 class ApplyRegBankMapping final : public GISelChangeObserver {
41 private:
43  const RegisterBank *NewBank;
45 
46 public:
47  ApplyRegBankMapping(MachineRegisterInfo &MRI_, const RegisterBank *RB)
48  : MRI(MRI_), NewBank(RB) {}
49 
50  ~ApplyRegBankMapping() {
51  for (MachineInstr *MI : NewInsts)
52  applyBank(*MI);
53  }
54 
55  /// Set any registers that don't have a set register class or bank to SALU.
56  void applyBank(MachineInstr &MI) {
57  for (MachineOperand &Op : MI.operands()) {
58  if (!Op.isReg())
59  continue;
60 
61  Register Reg = Op.getReg();
62  if (MRI.getRegClassOrRegBank(Reg))
63  continue;
64 
65  const RegisterBank *RB = NewBank;
66  // FIXME: This might not be enough to detect when SCC should be used.
67  if (MRI.getType(Reg) == LLT::scalar(1))
68  RB = (NewBank == &AMDGPU::SGPRRegBank ?
69  &AMDGPU::SCCRegBank : &AMDGPU::VCCRegBank);
70 
71  MRI.setRegBank(Reg, *RB);
72  }
73  }
74 
75  void erasingInstr(MachineInstr &MI) override {}
76 
77  void createdInstr(MachineInstr &MI) override {
78  // At this point, the instruction was just inserted and has no operands.
79  NewInsts.push_back(&MI);
80  }
81 
82  void changingInstr(MachineInstr &MI) override {}
83  void changedInstr(MachineInstr &MI) override {}
84 };
85 
86 }
89  TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
90 
91  // HACK: Until this is fully tablegen'd.
92  static bool AlreadyInit = false;
93  if (AlreadyInit)
94  return;
95 
96  AlreadyInit = true;
97 
98  const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
99  (void)RBSGPR;
100  assert(&RBSGPR == &AMDGPU::SGPRRegBank);
101 
102  const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
103  (void)RBVGPR;
104  assert(&RBVGPR == &AMDGPU::VGPRRegBank);
105 
106 }
107 
109  const RegisterBank &Src,
110  unsigned Size) const {
111  // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
112  if (Dst.getID() == AMDGPU::SGPRRegBankID &&
113  Src.getID() == AMDGPU::VGPRRegBankID) {
115  }
116 
117  // Bool values are tricky, because the meaning is based on context. The SCC
118  // and VCC banks are for the natural scalar and vector conditions produced by
119  // a compare.
120  //
121  // Legalization doesn't know about the necessary context, so an s1 use may
122  // have been a truncate from an arbitrary value, in which case a copy (lowered
123  // as a compare with 0) needs to be inserted.
124  if (Size == 1 &&
125  (Dst.getID() == AMDGPU::SCCRegBankID ||
126  Dst.getID() == AMDGPU::SGPRRegBankID) &&
127  (Src.getID() == AMDGPU::SGPRRegBankID ||
128  Src.getID() == AMDGPU::VGPRRegBankID ||
129  Src.getID() == AMDGPU::VCCRegBankID))
131 
132  if (Dst.getID() == AMDGPU::SCCRegBankID &&
133  Src.getID() == AMDGPU::VCCRegBankID)
135 
136  return RegisterBankInfo::copyCost(Dst, Src, Size);
137 }
138 
140  const ValueMapping &ValMapping,
141  const RegisterBank *CurBank) const {
142  // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
143  // VGPR.
144  // FIXME: Is there a better way to do this?
145  if (ValMapping.NumBreakDowns >= 2 || ValMapping.BreakDown[0].Length >= 64)
146  return 10; // This is expensive.
147 
148  assert(ValMapping.NumBreakDowns == 2 &&
149  ValMapping.BreakDown[0].Length == 32 &&
150  ValMapping.BreakDown[0].StartIdx == 0 &&
151  ValMapping.BreakDown[1].Length == 32 &&
152  ValMapping.BreakDown[1].StartIdx == 32 &&
153  ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
154 
155  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
156  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
157  // want.
158 
159  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
160  // alignment restrictions, but this probably isn't important.
161  return 1;
162 }
163 
165  const TargetRegisterClass &RC) const {
166 
167  if (TRI->isSGPRClass(&RC))
168  return getRegBank(AMDGPU::SGPRRegBankID);
169 
170  return getRegBank(AMDGPU::VGPRRegBankID);
171 }
172 
173 template <unsigned NumOps>
175 AMDGPURegisterBankInfo::addMappingFromTable(
176  const MachineInstr &MI, const MachineRegisterInfo &MRI,
177  const std::array<unsigned, NumOps> RegSrcOpIdx,
178  ArrayRef<OpRegBankEntry<NumOps>> Table) const {
179 
180  InstructionMappings AltMappings;
181 
183 
184  unsigned Sizes[NumOps];
185  for (unsigned I = 0; I < NumOps; ++I) {
186  Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
187  Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
188  }
189 
190  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
191  unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
192  Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
193  }
194 
195  // getInstrMapping's default mapping uses ID 1, so start at 2.
196  unsigned MappingID = 2;
197  for (const auto &Entry : Table) {
198  for (unsigned I = 0; I < NumOps; ++I) {
199  int OpIdx = RegSrcOpIdx[I];
200  Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
201  }
202 
203  AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
204  getOperandsMapping(Operands),
205  Operands.size()));
206  }
207 
208  return AltMappings;
209 }
210 
212 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
213  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
214  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
215  case Intrinsic::amdgcn_readlane: {
216  static const OpRegBankEntry<3> Table[2] = {
217  // Perfectly legal.
218  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
219 
220  // Need a readfirstlane for the index.
221  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
222  };
223 
224  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
225  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
226  }
227  case Intrinsic::amdgcn_writelane: {
228  static const OpRegBankEntry<4> Table[4] = {
229  // Perfectly legal.
230  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
231 
232  // Need readfirstlane of first op
233  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
234 
235  // Need readfirstlane of second op
236  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
237 
238  // Need readfirstlane of both ops
239  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
240  };
241 
242  // rsrc, voffset, offset
243  const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
244  return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
245  }
246  default:
248  }
249 }
250 
252 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
253  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
254 
255  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
256  case Intrinsic::amdgcn_buffer_load: {
257  static const OpRegBankEntry<3> Table[4] = {
258  // Perfectly legal.
259  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
260  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
261 
262  // Waterfall loop needed for rsrc. In the worst case this will execute
263  // approximately an extra 10 * wavesize + 2 instructions.
264  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
265  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
266  };
267 
268  // rsrc, voffset, offset
269  const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
270  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
271  }
272  case Intrinsic::amdgcn_s_buffer_load: {
273  static const OpRegBankEntry<2> Table[4] = {
274  // Perfectly legal.
275  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
276 
277  // Only need 1 register in loop
278  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
279 
280  // Have to waterfall the resource.
281  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
282 
283  // Have to waterfall the resource, and the offset.
284  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
285  };
286 
287  // rsrc, offset
288  const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
289  return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
290  }
291  case Intrinsic::amdgcn_ds_ordered_add:
292  case Intrinsic::amdgcn_ds_ordered_swap: {
293  // VGPR = M0, VGPR
294  static const OpRegBankEntry<3> Table[2] = {
295  // Perfectly legal.
296  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
297 
298  // Need a readfirstlane for m0
299  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
300  };
301 
302  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
303  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
304  }
305  case Intrinsic::amdgcn_s_sendmsg:
306  case Intrinsic::amdgcn_s_sendmsghalt: {
307  static const OpRegBankEntry<1> Table[2] = {
308  // Perfectly legal.
309  { { AMDGPU::SGPRRegBankID }, 1 },
310 
311  // Need readlane
312  { { AMDGPU::VGPRRegBankID }, 3 }
313  };
314 
315  const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
316  return addMappingFromTable<1>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
317  }
318  default:
320  }
321 }
322 
323 static bool isInstrUniform(const MachineInstr &MI) {
324  if (!MI.hasOneMemOperand())
325  return false;
326 
327  const MachineMemOperand *MMO = *MI.memoperands_begin();
328  return AMDGPUInstrInfo::isUniformMMO(MMO);
329 }
330 
333  const MachineInstr &MI) const {
334 
335  const MachineFunction &MF = *MI.getParent()->getParent();
336  const MachineRegisterInfo &MRI = MF.getRegInfo();
337 
338 
339  InstructionMappings AltMappings;
340  switch (MI.getOpcode()) {
341  case TargetOpcode::G_CONSTANT:
342  case TargetOpcode::G_FCONSTANT:
343  case TargetOpcode::G_FRAME_INDEX:
344  case TargetOpcode::G_GLOBAL_VALUE: {
345  static const OpRegBankEntry<1> Table[2] = {
346  { { AMDGPU::VGPRRegBankID }, 1 },
347  { { AMDGPU::SGPRRegBankID }, 1 }
348  };
349 
350  return addMappingFromTable<1>(MI, MRI, { 0 }, Table);
351  }
352  case TargetOpcode::G_AND:
353  case TargetOpcode::G_OR:
354  case TargetOpcode::G_XOR: {
355  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
356 
357  if (Size == 1) {
358  // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
359  const InstructionMapping &SCCMapping = getInstructionMapping(
360  1, 1, getOperandsMapping(
361  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
362  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
363  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
364  3); // Num Operands
365  AltMappings.push_back(&SCCMapping);
366 
367  const InstructionMapping &SGPRMapping = getInstructionMapping(
368  1, 1, getOperandsMapping(
369  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
370  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
371  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
372  3); // Num Operands
373  AltMappings.push_back(&SGPRMapping);
374 
375  const InstructionMapping &VCCMapping0 = getInstructionMapping(
376  2, 10, getOperandsMapping(
377  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
378  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
379  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
380  3); // Num Operands
381  AltMappings.push_back(&VCCMapping0);
382  return AltMappings;
383  }
384 
385  if (Size != 64)
386  break;
387 
388  const InstructionMapping &SSMapping = getInstructionMapping(
389  1, 1, getOperandsMapping(
390  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
391  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
392  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
393  3); // Num Operands
394  AltMappings.push_back(&SSMapping);
395 
396  const InstructionMapping &VVMapping = getInstructionMapping(
397  2, 2, getOperandsMapping(
398  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
399  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
400  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
401  3); // Num Operands
402  AltMappings.push_back(&VVMapping);
403 
404  const InstructionMapping &SVMapping = getInstructionMapping(
405  3, 3, getOperandsMapping(
406  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
407  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
408  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
409  3); // Num Operands
410  AltMappings.push_back(&SVMapping);
411 
412  // SGPR in LHS is slightly preferrable, so make it VS more expensive than
413  // SV.
414  const InstructionMapping &VSMapping = getInstructionMapping(
415  3, 4, getOperandsMapping(
416  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
417  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
418  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
419  3); // Num Operands
420  AltMappings.push_back(&VSMapping);
421  break;
422  }
423  case TargetOpcode::G_LOAD: {
424  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
425  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
426  // FIXME: Should we be hard coding the size for these mappings?
427  if (isInstrUniform(MI)) {
428  const InstructionMapping &SSMapping = getInstructionMapping(
429  1, 1, getOperandsMapping(
430  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
431  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
432  2); // Num Operands
433  AltMappings.push_back(&SSMapping);
434  }
435 
436  const InstructionMapping &VVMapping = getInstructionMapping(
437  2, 1, getOperandsMapping(
438  {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
439  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
440  2); // Num Operands
441  AltMappings.push_back(&VVMapping);
442 
443  // It may be possible to have a vgpr = load sgpr mapping here, because
444  // the mubuf instructions support this kind of load, but probably for only
445  // gfx7 and older. However, the addressing mode matching in the instruction
446  // selector should be able to do a better job of detecting and selecting
447  // these kinds of loads from the vgpr = load vgpr mapping.
448 
449  return AltMappings;
450 
451  }
452  case TargetOpcode::G_ICMP: {
453  unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
454  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
455  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
456  nullptr, // Predicate operand.
457  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
458  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
459  4); // Num Operands
460  AltMappings.push_back(&SSMapping);
461 
462  const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
463  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
464  nullptr, // Predicate operand.
465  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
466  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
467  4); // Num Operands
468  AltMappings.push_back(&SVMapping);
469 
470  const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
471  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
472  nullptr, // Predicate operand.
473  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
474  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
475  4); // Num Operands
476  AltMappings.push_back(&VSMapping);
477 
478  const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
479  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
480  nullptr, // Predicate operand.
481  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
482  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
483  4); // Num Operands
484  AltMappings.push_back(&VVMapping);
485 
486  return AltMappings;
487  }
488  case TargetOpcode::G_SELECT: {
489  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
490  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
491  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
492  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
493  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
494  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
495  4); // Num Operands
496  AltMappings.push_back(&SSMapping);
497 
498  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
499  getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
500  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
501  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
502  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
503  4); // Num Operands
504  AltMappings.push_back(&VVMapping);
505 
506  return AltMappings;
507  }
508  case TargetOpcode::G_SMIN:
509  case TargetOpcode::G_SMAX:
510  case TargetOpcode::G_UMIN:
511  case TargetOpcode::G_UMAX: {
512  static const OpRegBankEntry<3> Table[4] = {
513  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
514  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
515  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
516 
517  // Scalar requires cmp+select, and extends if 16-bit.
518  // FIXME: Should there be separate costs for 32 and 16-bit
519  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
520  };
521 
522  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
523  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
524  }
525  case TargetOpcode::G_UADDE:
526  case TargetOpcode::G_USUBE:
527  case TargetOpcode::G_SADDE:
528  case TargetOpcode::G_SSUBE: {
529  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
530  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
532  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
533  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
534  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
535  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
536  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
537  5); // Num Operands
538  AltMappings.push_back(&SSMapping);
539 
540  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
541  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
542  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
543  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
544  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
545  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
546  5); // Num Operands
547  AltMappings.push_back(&VVMapping);
548  return AltMappings;
549  }
550  case AMDGPU::G_BRCOND: {
551  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
552 
553  const InstructionMapping &SMapping = getInstructionMapping(
554  1, 1, getOperandsMapping(
555  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
556  2); // Num Operands
557  AltMappings.push_back(&SMapping);
558 
559  const InstructionMapping &VMapping = getInstructionMapping(
560  1, 1, getOperandsMapping(
561  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
562  2); // Num Operands
563  AltMappings.push_back(&VMapping);
564  return AltMappings;
565  }
566  case AMDGPU::G_INTRINSIC:
567  return getInstrAlternativeMappingsIntrinsic(MI, MRI);
568  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
569  return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
570  default:
571  break;
572  }
574 }
575 
576 void AMDGPURegisterBankInfo::split64BitValueForMapping(
579  LLT HalfTy,
580  Register Reg) const {
581  assert(HalfTy.getSizeInBits() == 32);
582  MachineRegisterInfo *MRI = B.getMRI();
583  Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);
584  Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);
585  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
586  MRI->setRegBank(LoLHS, *Bank);
587  MRI->setRegBank(HiLHS, *Bank);
588 
589  Regs.push_back(LoLHS);
590  Regs.push_back(HiLHS);
591 
592  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
593  .addDef(LoLHS)
594  .addDef(HiLHS)
595  .addUse(Reg);
596 }
597 
598 /// Replace the current type each register in \p Regs has with \p NewTy
600  LLT NewTy) {
601  for (Register Reg : Regs) {
602  assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
603  MRI.setType(Reg, NewTy);
604  }
605 }
606 
608  if (Ty.isVector()) {
609  assert(Ty.getNumElements() % 2 == 0);
610  return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
611  }
612 
613  assert(Ty.getSizeInBits() % 2 == 0);
614  return LLT::scalar(Ty.getSizeInBits() / 2);
615 }
616 
617 /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
618 /// any of the required SGPR operands are VGPRs, perform a waterfall loop to
619 /// execute the instruction for each unique combination of values in all lanes
620 /// in the wave. The block will be split such that rest of the instructions are
621 /// moved to a new block.
622 ///
623 /// Essentially performs this loop:
624 //
625 /// Save Execution Mask
626 /// For (Lane : Wavefront) {
627 /// Enable Lane, Disable all other lanes
628 /// SGPR = read SGPR value for current lane from VGPR
629 /// VGPRResult[Lane] = use_op SGPR
630 /// }
631 /// Restore Execution Mask
632 ///
633 /// There is additional complexity to try for compare values to identify the
634 /// unique values used.
635 void AMDGPURegisterBankInfo::executeInWaterfallLoop(
637  ArrayRef<unsigned> OpIndices) const {
638  MachineFunction *MF = MI.getParent()->getParent();
639  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
640  const SIInstrInfo *TII = ST.getInstrInfo();
642 
643  MachineBasicBlock &MBB = *MI.getParent();
644  const DebugLoc &DL = MI.getDebugLoc();
645 
646  // Use a set to avoid extra readfirstlanes in the case where multiple operands
647  // are the same register.
648  SmallSet<Register, 4> SGPROperandRegs;
649  for (unsigned Op : OpIndices) {
650  assert(MI.getOperand(Op).isUse());
651  Register Reg = MI.getOperand(Op).getReg();
652  const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
653  if (OpBank->getID() == AMDGPU::VGPRRegBankID)
654  SGPROperandRegs.insert(Reg);
655  }
656 
657  // No operands need to be replaced, so no need to loop.
658  if (SGPROperandRegs.empty())
659  return;
660 
661  MachineIRBuilder B(MI);
662  SmallVector<Register, 4> ResultRegs;
663  SmallVector<Register, 4> InitResultRegs;
664  SmallVector<Register, 4> PhiRegs;
665  for (MachineOperand &Def : MI.defs()) {
666  LLT ResTy = MRI.getType(Def.getReg());
667  const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
668  ResultRegs.push_back(Def.getReg());
669  Register InitReg = B.buildUndef(ResTy).getReg(0);
670  Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
671  InitResultRegs.push_back(InitReg);
672  PhiRegs.push_back(PhiReg);
673  MRI.setRegBank(PhiReg, *DefBank);
674  MRI.setRegBank(InitReg, *DefBank);
675  }
676 
677  Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
678  Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
679 
680  // Don't bother using generic instructions/registers for the exec mask.
681  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
682  .addDef(InitSaveExecReg);
683 
684  Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
685  Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
686 
687  // To insert the loop we need to split the block. Move everything before this
688  // point to a new block, and insert a new empty block before this instruction.
690  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
691  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
692  MachineFunction::iterator MBBI(MBB);
693  ++MBBI;
694  MF->insert(MBBI, LoopBB);
695  MF->insert(MBBI, RestoreExecBB);
696  MF->insert(MBBI, RemainderBB);
697 
698  LoopBB->addSuccessor(RestoreExecBB);
699  LoopBB->addSuccessor(LoopBB);
700 
701  // Move the rest of the block into a new block.
702  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
703  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
704 
705  MBB.addSuccessor(LoopBB);
706  RestoreExecBB->addSuccessor(RemainderBB);
707 
708  B.setInsertPt(*LoopBB, LoopBB->end());
709 
710  B.buildInstr(TargetOpcode::PHI)
711  .addDef(PhiExec)
712  .addReg(InitSaveExecReg)
713  .addMBB(&MBB)
714  .addReg(NewExec)
715  .addMBB(LoopBB);
716 
717  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
718  B.buildInstr(TargetOpcode::G_PHI)
719  .addDef(std::get<2>(Result))
720  .addReg(std::get<0>(Result)) // Initial value / implicit_def
721  .addMBB(&MBB)
722  .addReg(std::get<1>(Result)) // Mid-loop value.
723  .addMBB(LoopBB);
724  }
725 
726  // Move the instruction into the loop.
727  LoopBB->splice(LoopBB->end(), &MBB, I);
728  I = std::prev(LoopBB->end());
729 
730  B.setInstr(*I);
731 
732  Register CondReg;
733 
734  for (MachineOperand &Op : MI.uses()) {
735  if (!Op.isReg())
736  continue;
737 
738  assert(!Op.isDef());
739  if (SGPROperandRegs.count(Op.getReg())) {
740  LLT OpTy = MRI.getType(Op.getReg());
741  unsigned OpSize = OpTy.getSizeInBits();
742 
743  // Can only do a readlane of 32-bit pieces.
744  if (OpSize == 32) {
745  // Avoid extra copies in the simple case of one 32-bit register.
746  Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
747  MRI.setType(CurrentLaneOpReg, OpTy);
748 
749  constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
750  // Read the next variant <- also loop target.
751  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
752  .addReg(Op.getReg());
753 
754  Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
755  bool First = CondReg == AMDGPU::NoRegister;
756  if (First)
757  CondReg = NewCondReg;
758 
759  // Compare the just read M0 value to all possible Idx values.
760  B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
761  .addDef(NewCondReg)
762  .addReg(CurrentLaneOpReg)
763  .addReg(Op.getReg());
764  Op.setReg(CurrentLaneOpReg);
765 
766  if (!First) {
767  Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
768 
769  // If there are multiple operands to consider, and the conditions.
770  B.buildInstr(AMDGPU::S_AND_B64)
771  .addDef(AndReg)
772  .addReg(NewCondReg)
773  .addReg(CondReg);
774  CondReg = AndReg;
775  }
776  } else {
777  LLT S32 = LLT::scalar(32);
778  SmallVector<Register, 8> ReadlanePieces;
779 
780  // The compares can be done as 64-bit, but the extract needs to be done
781  // in 32-bit pieces.
782 
783  bool Is64 = OpSize % 64 == 0;
784 
785  LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
786  unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
787  : AMDGPU::V_CMP_EQ_U32_e64;
788 
789  // The compares can be done as 64-bit, but the extract needs to be done
790  // in 32-bit pieces.
791 
792  // Insert the unmerge before the loop.
793 
794  B.setMBB(MBB);
795  auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
796  B.setInstr(*I);
797 
798  unsigned NumPieces = Unmerge->getNumOperands() - 1;
799  for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
800  Register UnmergePiece = Unmerge.getReg(PieceIdx);
801 
802  Register CurrentLaneOpReg;
803  if (Is64) {
804  Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
805  Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
806 
807  MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
808  MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
809  MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
810 
811  // Read the next variant <- also loop target.
812  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
813  CurrentLaneOpRegLo)
814  .addReg(UnmergePiece, 0, AMDGPU::sub0);
815 
816  // Read the next variant <- also loop target.
817  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
818  CurrentLaneOpRegHi)
819  .addReg(UnmergePiece, 0, AMDGPU::sub1);
820 
821  CurrentLaneOpReg =
822  B.buildMerge(LLT::scalar(64),
823  {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
824  .getReg(0);
825 
826  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
827 
828  if (OpTy.getScalarSizeInBits() == 64) {
829  // If we need to produce a 64-bit element vector, so use the
830  // merged pieces
831  ReadlanePieces.push_back(CurrentLaneOpReg);
832  } else {
833  // 32-bit element type.
834  ReadlanePieces.push_back(CurrentLaneOpRegLo);
835  ReadlanePieces.push_back(CurrentLaneOpRegHi);
836  }
837  } else {
838  CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
839  MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
840  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
841 
842  // Read the next variant <- also loop target.
843  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
844  CurrentLaneOpReg)
845  .addReg(UnmergePiece);
846  ReadlanePieces.push_back(CurrentLaneOpReg);
847  }
848 
849  Register NewCondReg
850  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
851  bool First = CondReg == AMDGPU::NoRegister;
852  if (First)
853  CondReg = NewCondReg;
854 
855  B.buildInstr(CmpOp)
856  .addDef(NewCondReg)
857  .addReg(CurrentLaneOpReg)
858  .addReg(UnmergePiece);
859 
860  if (!First) {
861  Register AndReg
862  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
863 
864  // If there are multiple operands to consider, and the conditions.
865  B.buildInstr(AMDGPU::S_AND_B64)
866  .addDef(AndReg)
867  .addReg(NewCondReg)
868  .addReg(CondReg);
869  CondReg = AndReg;
870  }
871  }
872 
873  // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
874  // BUILD_VECTOR
875  if (OpTy.isVector()) {
876  auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
877  Op.setReg(Merge.getReg(0));
878  } else {
879  auto Merge = B.buildMerge(OpTy, ReadlanePieces);
880  Op.setReg(Merge.getReg(0));
881  }
882 
883  MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
884  }
885  }
886  }
887 
888  B.setInsertPt(*LoopBB, LoopBB->end());
889 
890  // Update EXEC, save the original EXEC value to VCC.
891  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
892  .addDef(NewExec)
893  .addReg(CondReg, RegState::Kill);
894 
895  MRI.setSimpleHint(NewExec, CondReg);
896 
897  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
898  B.buildInstr(AMDGPU::S_XOR_B64_term)
899  .addDef(AMDGPU::EXEC)
900  .addReg(AMDGPU::EXEC)
901  .addReg(NewExec);
902 
903  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
904  // s_cbranch_scc0?
905 
906  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
907  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
908  .addMBB(LoopBB);
909 
910  // Save the EXEC mask before the loop.
911  BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
912  .addReg(AMDGPU::EXEC);
913 
914  // Restore the EXEC mask after the loop.
915  B.setMBB(*RestoreExecBB);
916  B.buildInstr(AMDGPU::S_MOV_B64_term)
917  .addDef(AMDGPU::EXEC)
918  .addReg(SaveExecReg);
919 }
920 
921 // Legalize an operand that must be an SGPR by inserting a readfirstlane.
922 void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
923  MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
924  Register Reg = MI.getOperand(OpIdx).getReg();
925  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
926  if (Bank != &AMDGPU::VGPRRegBank)
927  return;
928 
929  MachineIRBuilder B(MI);
930  Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
931  B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
932  .addDef(SGPR)
933  .addReg(Reg);
934 
935  const TargetRegisterClass *Constrained =
936  constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
937  (void)Constrained;
938  assert(Constrained && "Failed to constrain readfirstlane src reg");
939 
940  MI.getOperand(OpIdx).setReg(SGPR);
941 }
942 
943 // When regbankselect repairs registers, it will insert a repair instruction
944 // which defines the repaired register. Then it calls applyMapping and expects
945 // that the targets will either delete or rewrite the originally wrote to the
946 // repaired registers. Beccause of this, we end up in a situation where
947 // we have 2 instructions defining the same registers.
949  Register Reg,
950  const MachineInstr &MI) {
951  // Is there some way we can assert that there are exactly 2 def instructions?
952  for (MachineInstr &Other : MRI.def_instructions(Reg)) {
953  if (&Other != &MI)
954  return &Other;
955  }
956 
957  return nullptr;
958 }
959 
960 bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
961  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
962  MachineRegisterInfo &MRI) const {
963  Register DstReg = MI.getOperand(0).getReg();
964  const LLT LoadTy = MRI.getType(DstReg);
965  unsigned LoadSize = LoadTy.getSizeInBits();
966  const unsigned MaxNonSmrdLoadSize = 128;
967  // 128-bit loads are supported for all instruction types.
968  if (LoadSize <= MaxNonSmrdLoadSize)
969  return false;
970 
971  SmallVector<unsigned, 16> DefRegs(OpdMapper.getVRegs(0));
972  SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
973 
974  // If the pointer is an SGPR, we have nothing to do.
975  if (SrcRegs.empty())
976  return false;
977 
978  assert(LoadSize % MaxNonSmrdLoadSize == 0);
979 
980  // We want to get the repair instruction now, because it will help us
981  // determine which instruction the legalizer inserts that will also
982  // write to DstReg.
983  MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
984 
985  // RegBankSelect only emits scalar types, so we need to reset the pointer
986  // operand to a pointer type.
987  Register BasePtrReg = SrcRegs[0];
988  LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
989  MRI.setType(BasePtrReg, PtrTy);
990 
991  MachineIRBuilder B(MI);
992 
993  unsigned SplitElts =
994  MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
995  const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType());
996  ApplyRegBankMapping O(MRI, &AMDGPU::VGPRRegBank);
997  GISelObserverWrapper Observer(&O);
998  B.setChangeObserver(Observer);
999  LegalizerHelper Helper(B.getMF(), Observer, B);
1000  if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
1001  return false;
1002 
1003  // At this point, the legalizer has split the original load into smaller
1004  // loads. At the end of lowering, it inserts an instruction (LegalizedInst)
1005  // that combines the outputs of the lower loads and writes it to DstReg.
1006  // The register bank selector has also added the RepairInst which writes to
1007  // DstReg as well.
1008 
1009  MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
1010 
1011  // Replace the output of the LegalizedInst with a temporary register, since
1012  // RepairInst already defines DstReg.
1013  Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
1014  LegalizedInst->getOperand(0).setReg(TmpReg);
1015  B.setInsertPt(*RepairInst->getParent(), RepairInst);
1016 
1017  for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
1019  B.buildConstant(IdxReg, DefIdx);
1020  MRI.setRegBank(IdxReg, getRegBank(AMDGPU::VGPRRegBankID));
1021  B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
1022  }
1023 
1024  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1025  return true;
1026 }
1027 
1028 // For cases where only a single copy is inserted for matching register banks.
1029 // Replace the register in the instruction operand
1031  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
1032  SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
1033  if (!SrcReg.empty()) {
1034  assert(SrcReg.size() == 1);
1035  OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
1036  }
1037 }
1038 
1039 void AMDGPURegisterBankInfo::applyMappingImpl(
1040  const OperandsMapper &OpdMapper) const {
1041  MachineInstr &MI = OpdMapper.getMI();
1042  unsigned Opc = MI.getOpcode();
1043  MachineRegisterInfo &MRI = OpdMapper.getMRI();
1044  switch (Opc) {
1045  case AMDGPU::G_SELECT: {
1046  Register DstReg = MI.getOperand(0).getReg();
1047  LLT DstTy = MRI.getType(DstReg);
1048  if (DstTy.getSizeInBits() != 64)
1049  break;
1050 
1051  LLT HalfTy = getHalfSizedType(DstTy);
1052 
1053  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1054  SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1));
1055  SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1056  SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3));
1057 
1058  // All inputs are SGPRs, nothing special to do.
1059  if (DefRegs.empty()) {
1060  assert(Src1Regs.empty() && Src2Regs.empty());
1061  break;
1062  }
1063 
1064  MachineIRBuilder B(MI);
1065  if (Src0Regs.empty())
1066  Src0Regs.push_back(MI.getOperand(1).getReg());
1067  else {
1068  assert(Src0Regs.size() == 1);
1069  }
1070 
1071  if (Src1Regs.empty())
1072  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1073  else {
1074  setRegsToType(MRI, Src1Regs, HalfTy);
1075  }
1076 
1077  if (Src2Regs.empty())
1078  split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
1079  else
1080  setRegsToType(MRI, Src2Regs, HalfTy);
1081 
1082  setRegsToType(MRI, DefRegs, HalfTy);
1083 
1084  B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
1085  B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
1086 
1087  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1088  MI.eraseFromParent();
1089  return;
1090  }
1091  case AMDGPU::G_AND:
1092  case AMDGPU::G_OR:
1093  case AMDGPU::G_XOR: {
1094  // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
1095  // there is a VGPR input.
1096  Register DstReg = MI.getOperand(0).getReg();
1097  LLT DstTy = MRI.getType(DstReg);
1098  if (DstTy.getSizeInBits() != 64)
1099  break;
1100 
1101  LLT HalfTy = getHalfSizedType(DstTy);
1102  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1103  SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1));
1104  SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1105 
1106  // All inputs are SGPRs, nothing special to do.
1107  if (DefRegs.empty()) {
1108  assert(Src0Regs.empty() && Src1Regs.empty());
1109  break;
1110  }
1111 
1112  assert(DefRegs.size() == 2);
1113  assert(Src0Regs.size() == Src1Regs.size() &&
1114  (Src0Regs.empty() || Src0Regs.size() == 2));
1115 
1116  // Depending on where the source registers came from, the generic code may
1117  // have decided to split the inputs already or not. If not, we still need to
1118  // extract the values.
1119  MachineIRBuilder B(MI);
1120 
1121  if (Src0Regs.empty())
1122  split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
1123  else
1124  setRegsToType(MRI, Src0Regs, HalfTy);
1125 
1126  if (Src1Regs.empty())
1127  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1128  else
1129  setRegsToType(MRI, Src1Regs, HalfTy);
1130 
1131  setRegsToType(MRI, DefRegs, HalfTy);
1132 
1133  B.buildInstr(Opc)
1134  .addDef(DefRegs[0])
1135  .addUse(Src0Regs[0])
1136  .addUse(Src1Regs[0]);
1137 
1138  B.buildInstr(Opc)
1139  .addDef(DefRegs[1])
1140  .addUse(Src0Regs[1])
1141  .addUse(Src1Regs[1]);
1142 
1143  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1144  MI.eraseFromParent();
1145  return;
1146  }
1147  case AMDGPU::G_ADD:
1148  case AMDGPU::G_SUB:
1149  case AMDGPU::G_MUL: {
1150  Register DstReg = MI.getOperand(0).getReg();
1151  LLT DstTy = MRI.getType(DstReg);
1152  if (DstTy != LLT::scalar(16))
1153  break;
1154 
1155  const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1156  if (DstBank == &AMDGPU::VGPRRegBank)
1157  break;
1158 
1159  // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
1160  MachineFunction *MF = MI.getParent()->getParent();
1161  MachineIRBuilder B(MI);
1162  ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1163  GISelObserverWrapper Observer(&ApplySALU);
1164  LegalizerHelper Helper(*MF, Observer, B);
1165 
1166  if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
1168  llvm_unreachable("widen scalar should have succeeded");
1169  return;
1170  }
1171  case AMDGPU::G_SMIN:
1172  case AMDGPU::G_SMAX:
1173  case AMDGPU::G_UMIN:
1174  case AMDGPU::G_UMAX: {
1175  Register DstReg = MI.getOperand(0).getReg();
1176  const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1177  if (DstBank == &AMDGPU::VGPRRegBank)
1178  break;
1179 
1180  MachineFunction *MF = MI.getParent()->getParent();
1181  MachineIRBuilder B(MI);
1182  ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1183  GISelObserverWrapper Observer(&ApplySALU);
1184  LegalizerHelper Helper(*MF, Observer, B);
1185 
1186  // Turn scalar min/max into a compare and select.
1187  LLT Ty = MRI.getType(DstReg);
1188  LLT S32 = LLT::scalar(32);
1189  LLT S16 = LLT::scalar(16);
1190 
1191  if (Ty == S16) {
1192  // Need to widen to s32, and expand as cmp + select.
1193  if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
1194  llvm_unreachable("widenScalar should have succeeded");
1195 
1196  // FIXME: This is relying on widenScalar leaving MI in place.
1197  if (Helper.lower(MI, 0, S32) != LegalizerHelper::Legalized)
1198  llvm_unreachable("lower should have succeeded");
1199  } else {
1200  if (Helper.lower(MI, 0, Ty) != LegalizerHelper::Legalized)
1201  llvm_unreachable("lower should have succeeded");
1202  }
1203 
1204  return;
1205  }
1206  case AMDGPU::G_SEXT:
1207  case AMDGPU::G_ZEXT: {
1208  Register SrcReg = MI.getOperand(1).getReg();
1209  LLT SrcTy = MRI.getType(SrcReg);
1210  bool Signed = Opc == AMDGPU::G_SEXT;
1211 
1212  MachineIRBuilder B(MI);
1213  const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
1214 
1215  Register DstReg = MI.getOperand(0).getReg();
1216  LLT DstTy = MRI.getType(DstReg);
1217  if (DstTy.isScalar() &&
1218  SrcBank != &AMDGPU::SGPRRegBank &&
1219  SrcBank != &AMDGPU::SCCRegBank &&
1220  SrcBank != &AMDGPU::VCCRegBank &&
1221  // FIXME: Should handle any type that round to s64 when irregular
1222  // breakdowns supported.
1223  DstTy.getSizeInBits() == 64 &&
1224  SrcTy.getSizeInBits() <= 32) {
1225  const LLT S32 = LLT::scalar(32);
1226  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1227 
1228  // Extend to 32-bit, and then extend the low half.
1229  if (Signed) {
1230  // TODO: Should really be buildSExtOrCopy
1231  B.buildSExtOrTrunc(DefRegs[0], SrcReg);
1232 
1233  // Replicate sign bit from 32-bit extended part.
1234  auto ShiftAmt = B.buildConstant(S32, 31);
1235  MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1236  B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
1237  } else {
1238  B.buildZExtOrTrunc(DefRegs[0], SrcReg);
1239  B.buildConstant(DefRegs[1], 0);
1240  }
1241 
1242  MRI.setRegBank(DstReg, *SrcBank);
1243  MI.eraseFromParent();
1244  return;
1245  }
1246 
1247  if (SrcTy != LLT::scalar(1))
1248  return;
1249 
1250  if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
1251  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1252 
1253  const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
1254  &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
1255 
1256  unsigned DstSize = DstTy.getSizeInBits();
1257  // 64-bit select is SGPR only
1258  const bool UseSel64 = DstSize > 32 &&
1259  SrcBank->getID() == AMDGPU::SCCRegBankID;
1260 
1261  // TODO: Should s16 select be legal?
1262  LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32);
1263  auto True = B.buildConstant(SelType, Signed ? -1 : 1);
1264  auto False = B.buildConstant(SelType, 0);
1265 
1266  MRI.setRegBank(True.getReg(0), *DstBank);
1267  MRI.setRegBank(False.getReg(0), *DstBank);
1268  MRI.setRegBank(DstReg, *DstBank);
1269 
1270  if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
1271  B.buildSelect(DefRegs[0], SrcReg, True, False);
1272  B.buildCopy(DefRegs[1], DefRegs[0]);
1273  } else if (DstSize < 32) {
1274  auto Sel = B.buildSelect(SelType, SrcReg, True, False);
1275  MRI.setRegBank(Sel.getReg(0), *DstBank);
1276  B.buildTrunc(DstReg, Sel);
1277  } else {
1278  B.buildSelect(DstReg, SrcReg, True, False);
1279  }
1280 
1281  MI.eraseFromParent();
1282  return;
1283  }
1284 
1285  // Fixup the case with an s1 src that isn't a condition register. Use shifts
1286  // instead of introducing a compare to avoid an unnecessary condition
1287  // register (and since there's no scalar 16-bit compares).
1288  auto Ext = B.buildAnyExt(DstTy, SrcReg);
1289  auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
1290  auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
1291 
1292  if (MI.getOpcode() == AMDGPU::G_SEXT)
1293  B.buildAShr(DstReg, Shl, ShiftAmt);
1294  else
1295  B.buildLShr(DstReg, Shl, ShiftAmt);
1296 
1297  MRI.setRegBank(DstReg, *SrcBank);
1298  MRI.setRegBank(Ext.getReg(0), *SrcBank);
1299  MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1300  MRI.setRegBank(Shl.getReg(0), *SrcBank);
1301  MI.eraseFromParent();
1302  return;
1303  }
1304  case AMDGPU::G_EXTRACT_VECTOR_ELT:
1305  applyDefaultMapping(OpdMapper);
1306  executeInWaterfallLoop(MI, MRI, { 2 });
1307  return;
1308  case AMDGPU::G_INTRINSIC: {
1309  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1310  case Intrinsic::amdgcn_s_buffer_load: {
1311  // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
1312  executeInWaterfallLoop(MI, MRI, { 2, 3 });
1313  return;
1314  }
1315  case Intrinsic::amdgcn_readlane: {
1316  substituteSimpleCopyRegs(OpdMapper, 2);
1317 
1318  assert(empty(OpdMapper.getVRegs(0)));
1319  assert(empty(OpdMapper.getVRegs(3)));
1320 
1321  // Make sure the index is an SGPR. It doesn't make sense to run this in a
1322  // waterfall loop, so assume it's a uniform value.
1323  constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1324  return;
1325  }
1326  case Intrinsic::amdgcn_writelane: {
1327  assert(empty(OpdMapper.getVRegs(0)));
1328  assert(empty(OpdMapper.getVRegs(2)));
1329  assert(empty(OpdMapper.getVRegs(3)));
1330 
1331  substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
1332  constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
1333  constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1334  return;
1335  }
1336  default:
1337  break;
1338  }
1339  break;
1340  }
1341  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
1342  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1343  case Intrinsic::amdgcn_buffer_load: {
1344  executeInWaterfallLoop(MI, MRI, { 2 });
1345  return;
1346  }
1347  case Intrinsic::amdgcn_ds_ordered_add:
1348  case Intrinsic::amdgcn_ds_ordered_swap: {
1349  // This is only allowed to execute with 1 lane, so readfirstlane is safe.
1350  assert(empty(OpdMapper.getVRegs(0)));
1351  substituteSimpleCopyRegs(OpdMapper, 3);
1352  constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1353  return;
1354  }
1355  case Intrinsic::amdgcn_s_sendmsg:
1356  case Intrinsic::amdgcn_s_sendmsghalt: {
1357  // FIXME: Should this use a waterfall loop?
1358  constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1359  return;
1360  }
1361  default:
1362  break;
1363  }
1364  break;
1365  }
1366  case AMDGPU::G_LOAD: {
1367  if (applyMappingWideLoad(MI, OpdMapper, MRI))
1368  return;
1369  break;
1370  }
1371  default:
1372  break;
1373  }
1374 
1375  return applyDefaultMapping(OpdMapper);
1376 }
1377 
1378 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
1379  const MachineFunction &MF = *MI.getParent()->getParent();
1380  const MachineRegisterInfo &MRI = MF.getRegInfo();
1381  for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
1382  if (!MI.getOperand(i).isReg())
1383  continue;
1384  Register Reg = MI.getOperand(i).getReg();
1385  if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
1386  if (Bank->getID() == AMDGPU::VGPRRegBankID)
1387  return false;
1388 
1389  assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
1390  Bank->getID() == AMDGPU::SCCRegBankID);
1391  }
1392  }
1393  return true;
1394 }
1395 
1397 AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
1398  const MachineFunction &MF = *MI.getParent()->getParent();
1399  const MachineRegisterInfo &MRI = MF.getRegInfo();
1401 
1402  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1403  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1404  unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
1405  OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
1406  }
1407  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1408  MI.getNumOperands());
1409 }
1410 
1412 AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
1413  const MachineFunction &MF = *MI.getParent()->getParent();
1414  const MachineRegisterInfo &MRI = MF.getRegInfo();
1416  unsigned OpdIdx = 0;
1417 
1418  unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1419  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
1420 
1421  if (MI.getOperand(OpdIdx).isIntrinsicID())
1422  OpdsMapping[OpdIdx++] = nullptr;
1423 
1424  Register Reg1 = MI.getOperand(OpdIdx).getReg();
1425  unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
1426 
1427  unsigned DefaultBankID = Size1 == 1 ?
1428  AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
1429  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
1430 
1431  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
1432 
1433  for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
1434  const MachineOperand &MO = MI.getOperand(OpdIdx);
1435  if (!MO.isReg())
1436  continue;
1437 
1438  unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
1439  unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
1440  OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
1441  }
1442 
1443  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1444  MI.getNumOperands());
1445 }
1446 
1448 AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
1449  const MachineFunction &MF = *MI.getParent()->getParent();
1450  const MachineRegisterInfo &MRI = MF.getRegInfo();
1452 
1453  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
1454  const MachineOperand &Op = MI.getOperand(I);
1455  if (!Op.isReg())
1456  continue;
1457 
1458  unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI);
1459  OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1460  }
1461 
1462  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1463  MI.getNumOperands());
1464 }
1465 
1467 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
1468 
1469  const MachineFunction &MF = *MI.getParent()->getParent();
1470  const MachineRegisterInfo &MRI = MF.getRegInfo();
1472  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1473  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
1474  unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1475 
1476  const ValueMapping *ValMapping;
1477  const ValueMapping *PtrMapping;
1478 
1479  if (isInstrUniform(MI)) {
1480  // We have a uniform instruction so we want to use an SMRD load
1481  ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1482  PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
1483  } else {
1484  ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
1485  // FIXME: What would happen if we used SGPRRegBankID here?
1486  PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
1487  }
1488 
1489  OpdsMapping[0] = ValMapping;
1490  OpdsMapping[1] = PtrMapping;
1492  1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
1493  return Mapping;
1494 
1495  // FIXME: Do we want to add a mapping for FLAT load, or should we just
1496  // handle that during instruction selection?
1497 }
1498 
1499 unsigned
1500 AMDGPURegisterBankInfo::getRegBankID(Register Reg,
1501  const MachineRegisterInfo &MRI,
1502  const TargetRegisterInfo &TRI,
1503  unsigned Default) const {
1504 
1505  const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
1506  return Bank ? Bank->getID() : Default;
1507 }
1508 
1509 ///
1510 /// This function must return a legal mapping, because
1511 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
1512 /// in RegBankSelect::Mode::Fast. Any mapping that would cause a
1513 /// VGPR to SGPR generated is illegal.
1514 ///
1517  const MachineFunction &MF = *MI.getParent()->getParent();
1518  const MachineRegisterInfo &MRI = MF.getRegInfo();
1519 
1520  if (MI.isRegSequence()) {
1521  // If any input is a VGPR, the result must be a VGPR. The default handling
1522  // assumes any copy between banks is legal.
1523  unsigned BankID = AMDGPU::SGPRRegBankID;
1524 
1525  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1526  auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
1527  // It doesn't make sense to use vcc or scc banks here, so just ignore
1528  // them.
1529  if (OpBank != AMDGPU::SGPRRegBankID) {
1530  BankID = AMDGPU::VGPRRegBankID;
1531  break;
1532  }
1533  }
1534  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1535 
1536  const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID));
1537  return getInstructionMapping(
1538  1, /*Cost*/ 1,
1539  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1540  }
1541 
1542  // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
1543  // properly.
1544  //
1545  // TODO: There are additional exec masking dependencies to analyze.
1546  if (MI.getOpcode() == TargetOpcode::G_PHI) {
1547  // TODO: Generate proper invalid bank enum.
1548  int ResultBank = -1;
1549 
1550  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1551  Register Reg = MI.getOperand(I).getReg();
1552  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
1553 
1554  // FIXME: Assuming VGPR for any undetermined inputs.
1555  if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {
1556  ResultBank = AMDGPU::VGPRRegBankID;
1557  break;
1558  }
1559 
1560  unsigned OpBank = Bank->getID();
1561  // scc, scc -> sgpr
1562  if (OpBank == AMDGPU::SCCRegBankID) {
1563  // There's only one SCC register, so a phi requires copying to SGPR.
1564  OpBank = AMDGPU::SGPRRegBankID;
1565  } else if (OpBank == AMDGPU::VCCRegBankID) {
1566  // vcc, vcc -> vcc
1567  // vcc, sgpr -> vgpr
1568  if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) {
1569  ResultBank = AMDGPU::VGPRRegBankID;
1570  break;
1571  }
1572  }
1573 
1574  ResultBank = OpBank;
1575  }
1576 
1577  assert(ResultBank != -1);
1578 
1579  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1580 
1581  const ValueMapping &ValMap =
1582  getValueMapping(0, Size, getRegBank(ResultBank));
1583  return getInstructionMapping(
1584  1, /*Cost*/ 1,
1585  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1586  }
1587 
1589  if (Mapping.isValid())
1590  return Mapping;
1591 
1593 
1594  switch (MI.getOpcode()) {
1595  default:
1597 
1598  case AMDGPU::G_AND:
1599  case AMDGPU::G_OR:
1600  case AMDGPU::G_XOR: {
1601  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1602  if (Size == 1) {
1603  const RegisterBank *DstBank
1604  = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
1605 
1606  unsigned TargetBankID = -1;
1607  unsigned BankLHS = -1;
1608  unsigned BankRHS = -1;
1609  if (DstBank) {
1610  TargetBankID = DstBank->getID();
1611  if (DstBank == &AMDGPU::VCCRegBank) {
1612  TargetBankID = AMDGPU::VCCRegBankID;
1613  BankLHS = AMDGPU::VCCRegBankID;
1614  BankRHS = AMDGPU::VCCRegBankID;
1615  } else if (DstBank == &AMDGPU::SCCRegBank) {
1616  TargetBankID = AMDGPU::SCCRegBankID;
1617  BankLHS = AMDGPU::SGPRRegBankID;
1618  BankRHS = AMDGPU::SGPRRegBankID;
1619  } else {
1620  BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1621  AMDGPU::SGPRRegBankID);
1622  BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
1623  AMDGPU::SGPRRegBankID);
1624  }
1625  } else {
1626  BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1627  AMDGPU::VCCRegBankID);
1628  BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
1629  AMDGPU::VCCRegBankID);
1630 
1631  // Both inputs should be true booleans to produce a boolean result.
1632  if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
1633  TargetBankID = AMDGPU::VGPRRegBankID;
1634  } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
1635  TargetBankID = AMDGPU::VCCRegBankID;
1636  BankLHS = AMDGPU::VCCRegBankID;
1637  BankRHS = AMDGPU::VCCRegBankID;
1638  } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
1639  TargetBankID = AMDGPU::SGPRRegBankID;
1640  } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) {
1641  // The operation must be done on a 32-bit register, but it will set
1642  // scc. The result type could interchangably be SCC or SGPR, since
1643  // both values will be produced.
1644  TargetBankID = AMDGPU::SCCRegBankID;
1645  BankLHS = AMDGPU::SGPRRegBankID;
1646  BankRHS = AMDGPU::SGPRRegBankID;
1647  }
1648  }
1649 
1650  OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
1651  OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
1652  OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
1653  break;
1654  }
1655 
1656  if (Size == 64) {
1657 
1658  if (isSALUMapping(MI)) {
1659  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
1660  OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
1661  } else {
1662  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
1663  unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
1664  OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
1665 
1666  unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
1667  OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
1668  }
1669 
1670  break;
1671  }
1672 
1674  }
1675 
1676  case AMDGPU::G_GEP:
1677  case AMDGPU::G_ADD:
1678  case AMDGPU::G_SUB:
1679  case AMDGPU::G_MUL:
1680  case AMDGPU::G_SHL:
1681  case AMDGPU::G_LSHR:
1682  case AMDGPU::G_ASHR:
1683  case AMDGPU::G_UADDO:
1684  case AMDGPU::G_SADDO:
1685  case AMDGPU::G_USUBO:
1686  case AMDGPU::G_SSUBO:
1687  case AMDGPU::G_UADDE:
1688  case AMDGPU::G_SADDE:
1689  case AMDGPU::G_USUBE:
1690  case AMDGPU::G_SSUBE:
1691  case AMDGPU::G_UMULH:
1692  case AMDGPU::G_SMULH:
1693  case AMDGPU::G_SMIN:
1694  case AMDGPU::G_SMAX:
1695  case AMDGPU::G_UMIN:
1696  case AMDGPU::G_UMAX:
1697  if (isSALUMapping(MI))
1698  return getDefaultMappingSOP(MI);
1700 
1701  case AMDGPU::G_FADD:
1702  case AMDGPU::G_FSUB:
1703  case AMDGPU::G_FPTOSI:
1704  case AMDGPU::G_FPTOUI:
1705  case AMDGPU::G_FMUL:
1706  case AMDGPU::G_FMA:
1707  case AMDGPU::G_FSQRT:
1708  case AMDGPU::G_SITOFP:
1709  case AMDGPU::G_UITOFP:
1710  case AMDGPU::G_FPTRUNC:
1711  case AMDGPU::G_FPEXT:
1712  case AMDGPU::G_FEXP2:
1713  case AMDGPU::G_FLOG2:
1714  case AMDGPU::G_FMINNUM:
1715  case AMDGPU::G_FMAXNUM:
1716  case AMDGPU::G_FMINNUM_IEEE:
1717  case AMDGPU::G_FMAXNUM_IEEE:
1718  case AMDGPU::G_FCANONICALIZE:
1719  case AMDGPU::G_INTRINSIC_TRUNC:
1720  case AMDGPU::G_INTRINSIC_ROUND:
1721  return getDefaultMappingVOP(MI);
1722  case AMDGPU::G_IMPLICIT_DEF: {
1723  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1724  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1725  break;
1726  }
1727  case AMDGPU::G_FCONSTANT:
1728  case AMDGPU::G_CONSTANT:
1729  case AMDGPU::G_FRAME_INDEX:
1730  case AMDGPU::G_BLOCK_ADDR: {
1731  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1732  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1733  break;
1734  }
1735  case AMDGPU::G_INSERT: {
1736  unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1737  AMDGPU::VGPRRegBankID;
1738  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1739  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1740  unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
1741  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1742  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1743  OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
1744  OpdsMapping[3] = nullptr;
1745  break;
1746  }
1747  case AMDGPU::G_EXTRACT: {
1748  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1749  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1750  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1751  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1752  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1753  OpdsMapping[2] = nullptr;
1754  break;
1755  }
1756  case AMDGPU::G_MERGE_VALUES:
1757  case AMDGPU::G_BUILD_VECTOR:
1758  case AMDGPU::G_CONCAT_VECTORS: {
1759  unsigned Bank = isSALUMapping(MI) ?
1760  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1761  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1762  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1763 
1764  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1765  // Op1 and Dst should use the same register bank.
1766  for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)
1767  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
1768  break;
1769  }
1770  case AMDGPU::G_BITCAST:
1771  case AMDGPU::G_INTTOPTR:
1772  case AMDGPU::G_PTRTOINT:
1773  case AMDGPU::G_CTLZ:
1774  case AMDGPU::G_CTLZ_ZERO_UNDEF:
1775  case AMDGPU::G_CTTZ:
1776  case AMDGPU::G_CTTZ_ZERO_UNDEF:
1777  case AMDGPU::G_CTPOP:
1778  case AMDGPU::G_BSWAP:
1779  case AMDGPU::G_FABS:
1780  case AMDGPU::G_FNEG: {
1781  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1782  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1783  OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
1784  break;
1785  }
1786  case AMDGPU::G_TRUNC: {
1787  Register Dst = MI.getOperand(0).getReg();
1788  Register Src = MI.getOperand(1).getReg();
1789  unsigned Bank = getRegBankID(Src, MRI, *TRI);
1790  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1791  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1792  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1793  OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
1794  break;
1795  }
1796  case AMDGPU::G_ZEXT:
1797  case AMDGPU::G_SEXT:
1798  case AMDGPU::G_ANYEXT: {
1799  Register Dst = MI.getOperand(0).getReg();
1800  Register Src = MI.getOperand(1).getReg();
1801  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1802  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1803 
1804  unsigned DstBank;
1805  const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI);
1806  assert(SrcBank);
1807  switch (SrcBank->getID()) {
1808  case AMDGPU::SCCRegBankID:
1809  case AMDGPU::SGPRRegBankID:
1810  DstBank = AMDGPU::SGPRRegBankID;
1811  break;
1812  default:
1813  DstBank = AMDGPU::VGPRRegBankID;
1814  break;
1815  }
1816 
1817  // TODO: Should anyext be split into 32-bit part as well?
1818  if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
1819  OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
1820  OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
1821  } else {
1822  // Scalar extend can use 64-bit BFE, but VGPRs require extending to
1823  // 32-bits, and then to 64.
1824  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
1825  OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
1826  SrcSize);
1827  }
1828  break;
1829  }
1830  case AMDGPU::G_FCMP: {
1831  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1832  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1833  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
1834  OpdsMapping[1] = nullptr; // Predicate Operand.
1835  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1836  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1837  break;
1838  }
1839  case AMDGPU::G_STORE: {
1840  assert(MI.getOperand(0).isReg());
1841  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1842  // FIXME: We need to specify a different reg bank once scalar stores
1843  // are supported.
1844  const ValueMapping *ValMapping =
1845  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1846  // FIXME: Depending on the type of store, the pointer could be in
1847  // the SGPR Reg bank.
1848  // FIXME: Pointer size should be based on the address space.
1849  const ValueMapping *PtrMapping =
1850  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
1851 
1852  OpdsMapping[0] = ValMapping;
1853  OpdsMapping[1] = PtrMapping;
1854  break;
1855  }
1856 
1857  case AMDGPU::G_ICMP: {
1858  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1859  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1860  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1861  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1862 
1863  bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
1864  Op3Bank == AMDGPU::SGPRRegBankID &&
1865  (Size == 32 || (Size == 64 &&
1866  (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
1868 
1869  unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
1870 
1871  OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
1872  OpdsMapping[1] = nullptr; // Predicate Operand.
1873  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1874  OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
1875  break;
1876  }
1877  case AMDGPU::G_EXTRACT_VECTOR_ELT: {
1878  unsigned OutputBankID = isSALUMapping(MI) ?
1879  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1880  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1881  unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1882  unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1883 
1884  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1885  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1886 
1887  // The index can be either if the source vector is VGPR.
1888  OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1889  break;
1890  }
1891  case AMDGPU::G_INSERT_VECTOR_ELT: {
1892  unsigned OutputBankID = isSALUMapping(MI) ?
1893  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1894 
1895  unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1896  unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1897  unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
1898  unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1899  unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1900 
1901  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1902  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1903  OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
1904 
1905  // The index can be either if the source vector is VGPR.
1906  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1907  break;
1908  }
1909  case AMDGPU::G_UNMERGE_VALUES: {
1910  unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1911  AMDGPU::VGPRRegBankID;
1912 
1913  // Op1 and Dst should use the same register bank.
1914  // FIXME: Shouldn't this be the default? Why do we need to handle this?
1915  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1916  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1917  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
1918  }
1919  break;
1920  }
1921  case AMDGPU::G_INTRINSIC: {
1922  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1923  default:
1925  case Intrinsic::amdgcn_div_fmas:
1926  case Intrinsic::amdgcn_trig_preop:
1927  case Intrinsic::amdgcn_sin:
1928  case Intrinsic::amdgcn_cos:
1929  case Intrinsic::amdgcn_log_clamp:
1930  case Intrinsic::amdgcn_rcp:
1931  case Intrinsic::amdgcn_rcp_legacy:
1932  case Intrinsic::amdgcn_rsq:
1933  case Intrinsic::amdgcn_rsq_legacy:
1934  case Intrinsic::amdgcn_rsq_clamp:
1935  case Intrinsic::amdgcn_ldexp:
1936  case Intrinsic::amdgcn_frexp_mant:
1937  case Intrinsic::amdgcn_frexp_exp:
1938  case Intrinsic::amdgcn_fract:
1939  case Intrinsic::amdgcn_cvt_pkrtz:
1940  case Intrinsic::amdgcn_cvt_pknorm_i16:
1941  case Intrinsic::amdgcn_cvt_pknorm_u16:
1942  case Intrinsic::amdgcn_cvt_pk_i16:
1943  case Intrinsic::amdgcn_cvt_pk_u16:
1944  case Intrinsic::amdgcn_fmed3:
1945  case Intrinsic::amdgcn_cubeid:
1946  case Intrinsic::amdgcn_cubema:
1947  case Intrinsic::amdgcn_cubesc:
1948  case Intrinsic::amdgcn_cubetc:
1949  case Intrinsic::amdgcn_sffbh:
1950  case Intrinsic::amdgcn_fmad_ftz:
1951  case Intrinsic::amdgcn_mbcnt_lo:
1952  case Intrinsic::amdgcn_mbcnt_hi:
1953  case Intrinsic::amdgcn_ubfe:
1954  case Intrinsic::amdgcn_sbfe:
1955  case Intrinsic::amdgcn_lerp:
1956  case Intrinsic::amdgcn_sad_u8:
1957  case Intrinsic::amdgcn_msad_u8:
1958  case Intrinsic::amdgcn_sad_hi_u8:
1959  case Intrinsic::amdgcn_sad_u16:
1960  case Intrinsic::amdgcn_qsad_pk_u16_u8:
1961  case Intrinsic::amdgcn_mqsad_pk_u16_u8:
1962  case Intrinsic::amdgcn_mqsad_u32_u8:
1963  case Intrinsic::amdgcn_cvt_pk_u8_f32:
1964  case Intrinsic::amdgcn_alignbit:
1965  case Intrinsic::amdgcn_alignbyte:
1966  case Intrinsic::amdgcn_fdot2:
1967  case Intrinsic::amdgcn_sdot2:
1968  case Intrinsic::amdgcn_udot2:
1969  case Intrinsic::amdgcn_sdot4:
1970  case Intrinsic::amdgcn_udot4:
1971  case Intrinsic::amdgcn_sdot8:
1972  case Intrinsic::amdgcn_udot8:
1973  case Intrinsic::amdgcn_wwm:
1974  case Intrinsic::amdgcn_wqm:
1975  return getDefaultMappingVOP(MI);
1976  case Intrinsic::amdgcn_ds_permute:
1977  case Intrinsic::amdgcn_ds_bpermute:
1978  case Intrinsic::amdgcn_update_dpp:
1979  return getDefaultMappingAllVGPR(MI);
1980  case Intrinsic::amdgcn_kernarg_segment_ptr:
1981  case Intrinsic::amdgcn_s_getpc:
1982  case Intrinsic::amdgcn_groupstaticsize: {
1983  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1984  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1985  break;
1986  }
1987  case Intrinsic::amdgcn_wqm_vote: {
1988  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1989  OpdsMapping[0] = OpdsMapping[2]
1990  = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
1991  break;
1992  }
1993  case Intrinsic::amdgcn_s_buffer_load: {
1994  // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
1995  Register RSrc = MI.getOperand(2).getReg(); // SGPR
1996  Register Offset = MI.getOperand(3).getReg(); // SGPR/imm
1997 
1998  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1999  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
2000  unsigned Size3 = MRI.getType(Offset).getSizeInBits();
2001 
2002  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
2003  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
2004 
2005  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
2006  OpdsMapping[1] = nullptr; // intrinsic id
2007 
2008  // Lie and claim everything is legal, even though some need to be
2009  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2010  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
2011  OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
2012  OpdsMapping[4] = nullptr;
2013  break;
2014  }
2015  case Intrinsic::amdgcn_div_scale: {
2016  unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2017  unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2018  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
2019  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
2020 
2021  unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
2022  OpdsMapping[3] = AMDGPU::getValueMapping(
2023  getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
2024  OpdsMapping[4] = AMDGPU::getValueMapping(
2025  getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
2026 
2027  break;
2028  }
2029  case Intrinsic::amdgcn_class: {
2030  Register Src0Reg = MI.getOperand(2).getReg();
2031  Register Src1Reg = MI.getOperand(3).getReg();
2032  unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();
2033  unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
2034  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2035  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
2036  OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
2037  Src0Size);
2038  OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
2039  Src1Size);
2040  break;
2041  }
2042  case Intrinsic::amdgcn_icmp:
2043  case Intrinsic::amdgcn_fcmp: {
2044  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2045  // This is not VCCRegBank because this is not used in boolean contexts.
2046  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2047  unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2048  unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2049  unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
2050  OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
2051  OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
2052  break;
2053  }
2054  case Intrinsic::amdgcn_readlane: {
2055  // This must be an SGPR, but accept a VGPR.
2056  Register IdxReg = MI.getOperand(3).getReg();
2057  unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2058  unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2059  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2061  }
2062  case Intrinsic::amdgcn_readfirstlane: {
2063  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2064  unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2065  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2066  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2067  break;
2068  }
2069  case Intrinsic::amdgcn_writelane: {
2070  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2071  Register SrcReg = MI.getOperand(2).getReg();
2072  unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
2073  unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2074  Register IdxReg = MI.getOperand(3).getReg();
2075  unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2076  unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2077  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2078 
2079  // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
2080  // to legalize.
2081  OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
2082  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2083  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2084  break;
2085  }
2086  case Intrinsic::amdgcn_if_break: {
2087  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
2088  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2089  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
2090  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2091  break;
2092  }
2093  }
2094  break;
2095  }
2096  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
2097  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
2098  default:
2100  case Intrinsic::amdgcn_s_getreg:
2101  case Intrinsic::amdgcn_s_memtime:
2102  case Intrinsic::amdgcn_s_memrealtime:
2103  case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
2104  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2105  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2106  break;
2107  }
2108  case Intrinsic::amdgcn_ds_append:
2109  case Intrinsic::amdgcn_ds_consume:
2110  case Intrinsic::amdgcn_ds_fadd:
2111  case Intrinsic::amdgcn_ds_fmin:
2112  case Intrinsic::amdgcn_ds_fmax:
2113  case Intrinsic::amdgcn_atomic_inc:
2114  case Intrinsic::amdgcn_atomic_dec:
2115  return getDefaultMappingAllVGPR(MI);
2116  case Intrinsic::amdgcn_ds_ordered_add:
2117  case Intrinsic::amdgcn_ds_ordered_swap: {
2118  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2119  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2120  unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2121  AMDGPU::SGPRRegBankID);
2122  OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
2123  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2124  break;
2125  }
2126  case Intrinsic::amdgcn_exp_compr:
2127  OpdsMapping[0] = nullptr; // IntrinsicID
2128  // FIXME: These are immediate values which can't be read from registers.
2129  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2130  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2131  // FIXME: Could we support packed types here?
2132  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2133  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2134  // FIXME: These are immediate values which can't be read from registers.
2135  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2136  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2137  break;
2138  case Intrinsic::amdgcn_exp:
2139  OpdsMapping[0] = nullptr; // IntrinsicID
2140  // FIXME: These are immediate values which can't be read from registers.
2141  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2142  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2143  // FIXME: Could we support packed types here?
2144  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2145  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2146  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2147  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2148  // FIXME: These are immediate values which can't be read from registers.
2149  OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2150  OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2151  break;
2152  case Intrinsic::amdgcn_buffer_load: {
2153  Register RSrc = MI.getOperand(2).getReg(); // SGPR
2154  Register VIndex = MI.getOperand(3).getReg(); // VGPR
2155  Register Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
2156 
2157  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2158  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
2159  unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
2160  unsigned Size4 = MRI.getType(Offset).getSizeInBits();
2161 
2162  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
2163  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
2164 
2165  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
2166  OpdsMapping[1] = nullptr; // intrinsic id
2167 
2168  // Lie and claim everything is legal, even though some need to be
2169  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2170  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
2171  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
2172  OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
2173  OpdsMapping[5] = nullptr;
2174  OpdsMapping[6] = nullptr;
2175  break;
2176  }
2177  case Intrinsic::amdgcn_s_sendmsg:
2178  case Intrinsic::amdgcn_s_sendmsghalt: {
2179  // This must be an SGPR, but accept a VGPR.
2180  unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2181  AMDGPU::SGPRRegBankID);
2182  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
2183  break;
2184  }
2185  case Intrinsic::amdgcn_end_cf: {
2186  unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
2187  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2188  break;
2189  }
2190  }
2191  break;
2192  }
2193  case AMDGPU::G_SELECT: {
2194  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2195  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2196  AMDGPU::SGPRRegBankID);
2197  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI,
2198  AMDGPU::SGPRRegBankID);
2199  bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
2200  Op3Bank == AMDGPU::SGPRRegBankID;
2201 
2202  unsigned CondBankDefault = SGPRSrcs ?
2203  AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
2204  unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
2205  CondBankDefault);
2206  if (CondBank == AMDGPU::SGPRRegBankID)
2207  CondBank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
2208  else if (CondBank == AMDGPU::VGPRRegBankID)
2209  CondBank = AMDGPU::VCCRegBankID;
2210 
2211  unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SCCRegBankID ?
2212  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
2213 
2214  assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SCCRegBankID);
2215 
2216  if (Size == 64) {
2217  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2218  OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2219  OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2220  OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2221  } else {
2222  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
2223  OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2224  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
2225  OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
2226  }
2227 
2228  break;
2229  }
2230 
2231  case AMDGPU::G_LOAD:
2232  return getInstrMappingForLoad(MI);
2233 
2234  case AMDGPU::G_ATOMICRMW_XCHG:
2235  case AMDGPU::G_ATOMICRMW_ADD:
2236  case AMDGPU::G_ATOMICRMW_SUB:
2237  case AMDGPU::G_ATOMICRMW_AND:
2238  case AMDGPU::G_ATOMICRMW_OR:
2239  case AMDGPU::G_ATOMICRMW_XOR:
2240  case AMDGPU::G_ATOMICRMW_MAX:
2241  case AMDGPU::G_ATOMICRMW_MIN:
2242  case AMDGPU::G_ATOMICRMW_UMAX:
2243  case AMDGPU::G_ATOMICRMW_UMIN:
2244  case AMDGPU::G_ATOMICRMW_FADD:
2245  case AMDGPU::G_ATOMIC_CMPXCHG: {
2246  return getDefaultMappingAllVGPR(MI);
2247  }
2248  case AMDGPU::G_BRCOND: {
2249  unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
2250  AMDGPU::SGPRRegBankID);
2251  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
2252  if (Bank != AMDGPU::SCCRegBankID)
2253  Bank = AMDGPU::VCCRegBankID;
2254 
2255  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
2256  break;
2257  }
2258  }
2259 
2260  return getInstructionMapping(/*ID*/1, /*Cost*/1,
2261  getOperandsMapping(OpdsMapping),
2262  MI.getNumOperands());
2263 }
2264 
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Interface definition for SIRegisterInfo.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End...
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:494
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
Register getReg(unsigned Idx) const
Get the register for the operand index.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getScalarSizeInBits() const
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
Helper class that represents how the value of an instruction may be mapped and what is the related co...
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
bool isScalar() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
LLT getScalarType() const
bool isRegSequence() const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:461
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
bool isIntrinsicID() const
bool isVector() const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
A description of a memory reference used in the backend.
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
bool isSGPRClass(const TargetRegisterClass *RC) const
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:877
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
static void substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
const RegisterBank * RegBank
Register bank where the partial value lives.
void setChangeObserver(GISelChangeObserver &Observer)
static bool isInstrUniform(const MachineInstr &MI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
bool hasScalarCompareEq64() const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:667
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Helper class to build MachineInstr.
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned Length
Length of this mapping in bits.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:551
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:483
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static unsigned getIntrinsicID(const SDNode *N)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:536
static LLT getHalfSizedType(LLT Ty)
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static MachineInstr * getOtherVRegDef(const MachineRegisterInfo &MRI, Register Reg, const MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
This class implements the register bank concept.
Definition: RegisterBank.h:28
Helper struct that represents how a value is mapped through different register banks.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
This file declares the MachineIRBuilder class.
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Instruction has been legalized and the MachineFunction changed.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override
Get a register bank that covers RC.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
#define I(x, y, z)
Definition: MD5.cpp:58
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
uint32_t Size
Definition: Profile.cpp:46
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
This class provides the information for the target register banks.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr & getMI() const
Helper struct that represents how a value is mapped through different register banks.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
aarch64 promote const
unsigned NumBreakDowns
Number of partial mapping to break down this value.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
iterator_range< def_instr_iterator > def_instructions(unsigned Reg) const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:258
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Simple wrapper observer that takes several observers, and calls each one for each event...
Register getReg() const
getReg - Returns the register number.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
unsigned getPredicate() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164