LLVM  10.0.0svn
AMDGPURegisterBankInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/IR/Constants.h"
29 
30 #define GET_TARGET_REGBANK_IMPL
31 #include "AMDGPUGenRegisterBank.inc"
32 
33 // This file will be TableGen'ed at some point.
34 #include "AMDGPUGenRegisterBankInfo.def"
35 
36 using namespace llvm;
37 using namespace MIPatternMatch;
38 
39 namespace {
40 
41 // Observer to apply a register bank to new registers created by LegalizerHelper.
42 class ApplyRegBankMapping final : public GISelChangeObserver {
43 private:
45  const RegisterBank *NewBank;
47 
48 public:
49  ApplyRegBankMapping(MachineRegisterInfo &MRI_, const RegisterBank *RB)
50  : MRI(MRI_), NewBank(RB) {}
51 
52  ~ApplyRegBankMapping() {
53  for (MachineInstr *MI : NewInsts)
54  applyBank(*MI);
55  }
56 
57  /// Set any registers that don't have a set register class or bank to SALU.
58  void applyBank(MachineInstr &MI) {
59  for (MachineOperand &Op : MI.operands()) {
60  if (!Op.isReg())
61  continue;
62 
63  Register Reg = Op.getReg();
64  if (MRI.getRegClassOrRegBank(Reg))
65  continue;
66 
67  const RegisterBank *RB = NewBank;
68  // FIXME: This might not be enough to detect when SCC should be used.
69  if (MRI.getType(Reg) == LLT::scalar(1))
70  RB = (NewBank == &AMDGPU::SGPRRegBank ?
71  &AMDGPU::SCCRegBank : &AMDGPU::VCCRegBank);
72 
73  MRI.setRegBank(Reg, *RB);
74  }
75  }
76 
77  void erasingInstr(MachineInstr &MI) override {}
78 
79  void createdInstr(MachineInstr &MI) override {
80  // At this point, the instruction was just inserted and has no operands.
81  NewInsts.push_back(&MI);
82  }
83 
84  void changingInstr(MachineInstr &MI) override {}
85  void changedInstr(MachineInstr &MI) override {}
86 };
87 
88 }
91  Subtarget(ST),
92  TRI(Subtarget.getRegisterInfo()),
93  TII(Subtarget.getInstrInfo()) {
94 
95  // HACK: Until this is fully tablegen'd.
96  static bool AlreadyInit = false;
97  if (AlreadyInit)
98  return;
99 
100  AlreadyInit = true;
101 
102  const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
103  (void)RBSGPR;
104  assert(&RBSGPR == &AMDGPU::SGPRRegBank);
105 
106  const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
107  (void)RBVGPR;
108  assert(&RBVGPR == &AMDGPU::VGPRRegBank);
109 
110 }
111 
113  const RegisterBank &Src,
114  unsigned Size) const {
115  // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
116  if (Dst.getID() == AMDGPU::SGPRRegBankID &&
117  Src.getID() == AMDGPU::VGPRRegBankID) {
119  }
120 
121  // Bool values are tricky, because the meaning is based on context. The SCC
122  // and VCC banks are for the natural scalar and vector conditions produced by
123  // a compare.
124  //
125  // Legalization doesn't know about the necessary context, so an s1 use may
126  // have been a truncate from an arbitrary value, in which case a copy (lowered
127  // as a compare with 0) needs to be inserted.
128  if (Size == 1 &&
129  (Dst.getID() == AMDGPU::SCCRegBankID ||
130  Dst.getID() == AMDGPU::SGPRRegBankID) &&
131  (Src.getID() == AMDGPU::SGPRRegBankID ||
132  Src.getID() == AMDGPU::VGPRRegBankID ||
133  Src.getID() == AMDGPU::VCCRegBankID))
135 
136  if (Dst.getID() == AMDGPU::SCCRegBankID &&
137  Src.getID() == AMDGPU::VCCRegBankID)
139 
140  return RegisterBankInfo::copyCost(Dst, Src, Size);
141 }
142 
144  const ValueMapping &ValMapping,
145  const RegisterBank *CurBank) const {
146  // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
147  // VGPR.
148  // FIXME: Is there a better way to do this?
149  if (ValMapping.NumBreakDowns >= 2 || ValMapping.BreakDown[0].Length >= 64)
150  return 10; // This is expensive.
151 
152  assert(ValMapping.NumBreakDowns == 2 &&
153  ValMapping.BreakDown[0].Length == 32 &&
154  ValMapping.BreakDown[0].StartIdx == 0 &&
155  ValMapping.BreakDown[1].Length == 32 &&
156  ValMapping.BreakDown[1].StartIdx == 32 &&
157  ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
158 
159  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
160  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
161  // want.
162 
163  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
164  // alignment restrictions, but this probably isn't important.
165  return 1;
166 }
167 
169  const TargetRegisterClass &RC) const {
170  if (&RC == &AMDGPU::SReg_1RegClass)
171  return AMDGPU::VCCRegBank;
172 
173  return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank;
174 }
175 
176 template <unsigned NumOps>
178 AMDGPURegisterBankInfo::addMappingFromTable(
179  const MachineInstr &MI, const MachineRegisterInfo &MRI,
180  const std::array<unsigned, NumOps> RegSrcOpIdx,
181  ArrayRef<OpRegBankEntry<NumOps>> Table) const {
182 
183  InstructionMappings AltMappings;
184 
186 
187  unsigned Sizes[NumOps];
188  for (unsigned I = 0; I < NumOps; ++I) {
189  Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
190  Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
191  }
192 
193  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
194  unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
195  Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
196  }
197 
198  // getInstrMapping's default mapping uses ID 1, so start at 2.
199  unsigned MappingID = 2;
200  for (const auto &Entry : Table) {
201  for (unsigned I = 0; I < NumOps; ++I) {
202  int OpIdx = RegSrcOpIdx[I];
203  Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
204  }
205 
206  AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
208  Operands.size()));
209  }
210 
211  return AltMappings;
212 }
213 
215 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
216  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
217  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
218  case Intrinsic::amdgcn_readlane: {
219  static const OpRegBankEntry<3> Table[2] = {
220  // Perfectly legal.
221  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
222 
223  // Need a readfirstlane for the index.
224  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
225  };
226 
227  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
228  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
229  }
230  case Intrinsic::amdgcn_writelane: {
231  static const OpRegBankEntry<4> Table[4] = {
232  // Perfectly legal.
233  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
234 
235  // Need readfirstlane of first op
236  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
237 
238  // Need readfirstlane of second op
239  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
240 
241  // Need readfirstlane of both ops
242  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
243  };
244 
245  // rsrc, voffset, offset
246  const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
247  return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
248  }
249  default:
251  }
252 }
253 
255 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
256  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
257 
258  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
259  case Intrinsic::amdgcn_buffer_load: {
260  static const OpRegBankEntry<3> Table[4] = {
261  // Perfectly legal.
262  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
263  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
264 
265  // Waterfall loop needed for rsrc. In the worst case this will execute
266  // approximately an extra 10 * wavesize + 2 instructions.
267  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
268  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
269  };
270 
271  // rsrc, voffset, offset
272  const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
273  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
274  }
275  case Intrinsic::amdgcn_s_buffer_load: {
276  static const OpRegBankEntry<2> Table[4] = {
277  // Perfectly legal.
278  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
279 
280  // Only need 1 register in loop
281  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
282 
283  // Have to waterfall the resource.
284  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
285 
286  // Have to waterfall the resource, and the offset.
287  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
288  };
289 
290  // rsrc, offset
291  const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
292  return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
293  }
294  case Intrinsic::amdgcn_ds_ordered_add:
295  case Intrinsic::amdgcn_ds_ordered_swap: {
296  // VGPR = M0, VGPR
297  static const OpRegBankEntry<3> Table[2] = {
298  // Perfectly legal.
299  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
300 
301  // Need a readfirstlane for m0
302  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
303  };
304 
305  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
306  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
307  }
308  case Intrinsic::amdgcn_s_sendmsg:
309  case Intrinsic::amdgcn_s_sendmsghalt: {
310  // FIXME: Should have no register for immediate
311  static const OpRegBankEntry<2> Table[2] = {
312  // Perfectly legal.
313  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
314 
315  // Need readlane
316  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
317  };
318 
319  const std::array<unsigned, 2> RegSrcOpIdx = { { 1, 2 } };
320  return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
321  }
322  default:
324  }
325 }
326 
328  if (!MI.hasOneMemOperand())
329  return false;
330 
331  const MachineMemOperand *MMO = *MI.memoperands_begin();
332  return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 &&
334 }
335 
338  const MachineInstr &MI) const {
339 
340  const MachineFunction &MF = *MI.getParent()->getParent();
341  const MachineRegisterInfo &MRI = MF.getRegInfo();
342 
343 
344  InstructionMappings AltMappings;
345  switch (MI.getOpcode()) {
346  case TargetOpcode::G_CONSTANT:
347  case TargetOpcode::G_FCONSTANT:
348  case TargetOpcode::G_FRAME_INDEX:
349  case TargetOpcode::G_GLOBAL_VALUE: {
350  static const OpRegBankEntry<1> Table[2] = {
351  { { AMDGPU::VGPRRegBankID }, 1 },
352  { { AMDGPU::SGPRRegBankID }, 1 }
353  };
354 
355  return addMappingFromTable<1>(MI, MRI, { 0 }, Table);
356  }
357  case TargetOpcode::G_AND:
358  case TargetOpcode::G_OR:
359  case TargetOpcode::G_XOR: {
360  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
361 
362  if (Size == 1) {
363  // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
364  const InstructionMapping &SCCMapping = getInstructionMapping(
365  1, 1, getOperandsMapping(
366  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
367  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
368  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
369  3); // Num Operands
370  AltMappings.push_back(&SCCMapping);
371 
372  const InstructionMapping &SGPRMapping = getInstructionMapping(
373  1, 1, getOperandsMapping(
374  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
375  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
376  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
377  3); // Num Operands
378  AltMappings.push_back(&SGPRMapping);
379 
380  const InstructionMapping &VCCMapping0 = getInstructionMapping(
381  2, 10, getOperandsMapping(
382  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
383  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
384  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
385  3); // Num Operands
386  AltMappings.push_back(&VCCMapping0);
387  return AltMappings;
388  }
389 
390  if (Size != 64)
391  break;
392 
393  const InstructionMapping &SSMapping = getInstructionMapping(
394  1, 1, getOperandsMapping(
395  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
396  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
397  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
398  3); // Num Operands
399  AltMappings.push_back(&SSMapping);
400 
401  const InstructionMapping &VVMapping = getInstructionMapping(
402  2, 2, getOperandsMapping(
403  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
404  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
405  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
406  3); // Num Operands
407  AltMappings.push_back(&VVMapping);
408 
409  const InstructionMapping &SVMapping = getInstructionMapping(
410  3, 3, getOperandsMapping(
411  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
412  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
413  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
414  3); // Num Operands
415  AltMappings.push_back(&SVMapping);
416 
417  // SGPR in LHS is slightly preferrable, so make it VS more expensive than
418  // SV.
419  const InstructionMapping &VSMapping = getInstructionMapping(
420  3, 4, getOperandsMapping(
421  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
422  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
423  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
424  3); // Num Operands
425  AltMappings.push_back(&VSMapping);
426  break;
427  }
428  case TargetOpcode::G_LOAD:
429  case TargetOpcode::G_ZEXTLOAD:
430  case TargetOpcode::G_SEXTLOAD: {
431  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
432  LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
433  unsigned PtrSize = PtrTy.getSizeInBits();
434  unsigned AS = PtrTy.getAddressSpace();
435  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
438  const InstructionMapping &SSMapping = getInstructionMapping(
439  1, 1, getOperandsMapping(
440  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
441  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
442  2); // Num Operands
443  AltMappings.push_back(&SSMapping);
444  }
445 
446  const InstructionMapping &VVMapping = getInstructionMapping(
447  2, 1, getOperandsMapping(
448  {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
449  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
450  2); // Num Operands
451  AltMappings.push_back(&VVMapping);
452 
453  // It may be possible to have a vgpr = load sgpr mapping here, because
454  // the mubuf instructions support this kind of load, but probably for only
455  // gfx7 and older. However, the addressing mode matching in the instruction
456  // selector should be able to do a better job of detecting and selecting
457  // these kinds of loads from the vgpr = load vgpr mapping.
458 
459  return AltMappings;
460 
461  }
462  case TargetOpcode::G_ICMP: {
463  unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
464  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
465  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
466  nullptr, // Predicate operand.
467  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
468  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
469  4); // Num Operands
470  AltMappings.push_back(&SSMapping);
471 
472  const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
473  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
474  nullptr, // Predicate operand.
475  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
476  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
477  4); // Num Operands
478  AltMappings.push_back(&SVMapping);
479 
480  const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
481  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
482  nullptr, // Predicate operand.
483  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
484  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
485  4); // Num Operands
486  AltMappings.push_back(&VSMapping);
487 
488  const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
489  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
490  nullptr, // Predicate operand.
491  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
492  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
493  4); // Num Operands
494  AltMappings.push_back(&VVMapping);
495 
496  return AltMappings;
497  }
498  case TargetOpcode::G_SELECT: {
499  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
500  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
501  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
502  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
503  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
504  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
505  4); // Num Operands
506  AltMappings.push_back(&SSMapping);
507 
508  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
509  getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
510  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
511  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
512  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
513  4); // Num Operands
514  AltMappings.push_back(&VVMapping);
515 
516  return AltMappings;
517  }
518  case TargetOpcode::G_SMIN:
519  case TargetOpcode::G_SMAX:
520  case TargetOpcode::G_UMIN:
521  case TargetOpcode::G_UMAX: {
522  static const OpRegBankEntry<3> Table[4] = {
523  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
524  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
525  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
526 
527  // Scalar requires cmp+select, and extends if 16-bit.
528  // FIXME: Should there be separate costs for 32 and 16-bit
529  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
530  };
531 
532  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
533  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
534  }
535  case TargetOpcode::G_UADDE:
536  case TargetOpcode::G_USUBE:
537  case TargetOpcode::G_SADDE:
538  case TargetOpcode::G_SSUBE: {
539  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
540  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
542  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
543  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
544  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
545  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
546  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
547  5); // Num Operands
548  AltMappings.push_back(&SSMapping);
549 
550  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
551  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
552  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
553  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
554  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
555  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
556  5); // Num Operands
557  AltMappings.push_back(&VVMapping);
558  return AltMappings;
559  }
560  case AMDGPU::G_BRCOND: {
561  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
562 
563  const InstructionMapping &SMapping = getInstructionMapping(
564  1, 1, getOperandsMapping(
565  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
566  2); // Num Operands
567  AltMappings.push_back(&SMapping);
568 
569  const InstructionMapping &VMapping = getInstructionMapping(
570  1, 1, getOperandsMapping(
571  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
572  2); // Num Operands
573  AltMappings.push_back(&VMapping);
574  return AltMappings;
575  }
576  case AMDGPU::G_INTRINSIC:
577  return getInstrAlternativeMappingsIntrinsic(MI, MRI);
578  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
579  return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
580  default:
581  break;
582  }
584 }
585 
586 void AMDGPURegisterBankInfo::split64BitValueForMapping(
589  LLT HalfTy,
590  Register Reg) const {
591  assert(HalfTy.getSizeInBits() == 32);
592  MachineRegisterInfo *MRI = B.getMRI();
593  Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);
594  Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);
595  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
596  MRI->setRegBank(LoLHS, *Bank);
597  MRI->setRegBank(HiLHS, *Bank);
598 
599  Regs.push_back(LoLHS);
600  Regs.push_back(HiLHS);
601 
602  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
603  .addDef(LoLHS)
604  .addDef(HiLHS)
605  .addUse(Reg);
606 }
607 
608 /// Replace the current type each register in \p Regs has with \p NewTy
610  LLT NewTy) {
611  for (Register Reg : Regs) {
612  assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
613  MRI.setType(Reg, NewTy);
614  }
615 }
616 
618  if (Ty.isVector()) {
619  assert(Ty.getNumElements() % 2 == 0);
620  return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
621  }
622 
623  assert(Ty.getSizeInBits() % 2 == 0);
624  return LLT::scalar(Ty.getSizeInBits() / 2);
625 }
626 
627 /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
628 /// any of the required SGPR operands are VGPRs, perform a waterfall loop to
629 /// execute the instruction for each unique combination of values in all lanes
630 /// in the wave. The block will be split such that rest of the instructions are
631 /// moved to a new block.
632 ///
633 /// Essentially performs this loop:
634 //
635 /// Save Execution Mask
636 /// For (Lane : Wavefront) {
637 /// Enable Lane, Disable all other lanes
638 /// SGPR = read SGPR value for current lane from VGPR
639 /// VGPRResult[Lane] = use_op SGPR
640 /// }
641 /// Restore Execution Mask
642 ///
643 /// There is additional complexity to try for compare values to identify the
644 /// unique values used.
645 bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
646  MachineIRBuilder &B,
647  MachineInstr &MI,
648  MachineRegisterInfo &MRI,
649  ArrayRef<unsigned> OpIndices) const {
650  MachineFunction *MF = MI.getParent()->getParent();
651  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
652  const SIInstrInfo *TII = ST.getInstrInfo();
654 
655  MachineBasicBlock &MBB = *MI.getParent();
656  const DebugLoc &DL = MI.getDebugLoc();
657 
658  // Use a set to avoid extra readfirstlanes in the case where multiple operands
659  // are the same register.
660  SmallSet<Register, 4> SGPROperandRegs;
661  for (unsigned Op : OpIndices) {
662  assert(MI.getOperand(Op).isUse());
663  Register Reg = MI.getOperand(Op).getReg();
664  const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
665  if (OpBank->getID() == AMDGPU::VGPRRegBankID)
666  SGPROperandRegs.insert(Reg);
667  }
668 
669  // No operands need to be replaced, so no need to loop.
670  if (SGPROperandRegs.empty())
671  return false;
672 
673  SmallVector<Register, 4> ResultRegs;
674  SmallVector<Register, 4> InitResultRegs;
675  SmallVector<Register, 4> PhiRegs;
676  for (MachineOperand &Def : MI.defs()) {
677  LLT ResTy = MRI.getType(Def.getReg());
678  const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
679  ResultRegs.push_back(Def.getReg());
680  Register InitReg = B.buildUndef(ResTy).getReg(0);
681  Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
682  InitResultRegs.push_back(InitReg);
683  PhiRegs.push_back(PhiReg);
684  MRI.setRegBank(PhiReg, *DefBank);
685  MRI.setRegBank(InitReg, *DefBank);
686  }
687 
688  Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
689  Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
690 
691  // Don't bother using generic instructions/registers for the exec mask.
692  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
693  .addDef(InitSaveExecReg);
694 
695  Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
696  Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
697 
698  // To insert the loop we need to split the block. Move everything before this
699  // point to a new block, and insert a new empty block before this instruction.
701  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
702  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
703  MachineFunction::iterator MBBI(MBB);
704  ++MBBI;
705  MF->insert(MBBI, LoopBB);
706  MF->insert(MBBI, RestoreExecBB);
707  MF->insert(MBBI, RemainderBB);
708 
709  LoopBB->addSuccessor(RestoreExecBB);
710  LoopBB->addSuccessor(LoopBB);
711 
712  // Move the rest of the block into a new block.
713  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
714  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
715 
716  MBB.addSuccessor(LoopBB);
717  RestoreExecBB->addSuccessor(RemainderBB);
718 
719  B.setInsertPt(*LoopBB, LoopBB->end());
720 
721  B.buildInstr(TargetOpcode::PHI)
722  .addDef(PhiExec)
723  .addReg(InitSaveExecReg)
724  .addMBB(&MBB)
725  .addReg(NewExec)
726  .addMBB(LoopBB);
727 
728  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
729  B.buildInstr(TargetOpcode::G_PHI)
730  .addDef(std::get<2>(Result))
731  .addReg(std::get<0>(Result)) // Initial value / implicit_def
732  .addMBB(&MBB)
733  .addReg(std::get<1>(Result)) // Mid-loop value.
734  .addMBB(LoopBB);
735  }
736 
737  // Move the instruction into the loop.
738  LoopBB->splice(LoopBB->end(), &MBB, I);
739  I = std::prev(LoopBB->end());
740 
741  B.setInstr(*I);
742 
743  Register CondReg;
744 
745  for (MachineOperand &Op : MI.uses()) {
746  if (!Op.isReg())
747  continue;
748 
749  assert(!Op.isDef());
750  if (SGPROperandRegs.count(Op.getReg())) {
751  LLT OpTy = MRI.getType(Op.getReg());
752  unsigned OpSize = OpTy.getSizeInBits();
753 
754  // Can only do a readlane of 32-bit pieces.
755  if (OpSize == 32) {
756  // Avoid extra copies in the simple case of one 32-bit register.
757  Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
758  MRI.setType(CurrentLaneOpReg, OpTy);
759 
760  constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
761  // Read the next variant <- also loop target.
762  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
763  .addReg(Op.getReg());
764 
765  Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
766  bool First = CondReg == AMDGPU::NoRegister;
767  if (First)
768  CondReg = NewCondReg;
769 
770  // Compare the just read M0 value to all possible Idx values.
771  B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
772  .addDef(NewCondReg)
773  .addReg(CurrentLaneOpReg)
774  .addReg(Op.getReg());
775  Op.setReg(CurrentLaneOpReg);
776 
777  if (!First) {
778  Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
779 
780  // If there are multiple operands to consider, and the conditions.
781  B.buildInstr(AMDGPU::S_AND_B64)
782  .addDef(AndReg)
783  .addReg(NewCondReg)
784  .addReg(CondReg);
785  CondReg = AndReg;
786  }
787  } else {
788  LLT S32 = LLT::scalar(32);
789  SmallVector<Register, 8> ReadlanePieces;
790 
791  // The compares can be done as 64-bit, but the extract needs to be done
792  // in 32-bit pieces.
793 
794  bool Is64 = OpSize % 64 == 0;
795 
796  LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
797  unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
798  : AMDGPU::V_CMP_EQ_U32_e64;
799 
800  // The compares can be done as 64-bit, but the extract needs to be done
801  // in 32-bit pieces.
802 
803  // Insert the unmerge before the loop.
804 
805  B.setMBB(MBB);
806  auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
807  B.setInstr(*I);
808 
809  unsigned NumPieces = Unmerge->getNumOperands() - 1;
810  for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
811  Register UnmergePiece = Unmerge.getReg(PieceIdx);
812 
813  Register CurrentLaneOpReg;
814  if (Is64) {
815  Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
816  Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
817 
818  MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
819  MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
820  MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
821 
822  // Read the next variant <- also loop target.
823  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
824  CurrentLaneOpRegLo)
825  .addReg(UnmergePiece, 0, AMDGPU::sub0);
826 
827  // Read the next variant <- also loop target.
828  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
829  CurrentLaneOpRegHi)
830  .addReg(UnmergePiece, 0, AMDGPU::sub1);
831 
832  CurrentLaneOpReg =
833  B.buildMerge(LLT::scalar(64),
834  {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
835  .getReg(0);
836 
837  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
838 
839  if (OpTy.getScalarSizeInBits() == 64) {
840  // If we need to produce a 64-bit element vector, so use the
841  // merged pieces
842  ReadlanePieces.push_back(CurrentLaneOpReg);
843  } else {
844  // 32-bit element type.
845  ReadlanePieces.push_back(CurrentLaneOpRegLo);
846  ReadlanePieces.push_back(CurrentLaneOpRegHi);
847  }
848  } else {
849  CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
850  MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
851  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
852 
853  // Read the next variant <- also loop target.
854  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
855  CurrentLaneOpReg)
856  .addReg(UnmergePiece);
857  ReadlanePieces.push_back(CurrentLaneOpReg);
858  }
859 
860  Register NewCondReg
861  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
862  bool First = CondReg == AMDGPU::NoRegister;
863  if (First)
864  CondReg = NewCondReg;
865 
866  B.buildInstr(CmpOp)
867  .addDef(NewCondReg)
868  .addReg(CurrentLaneOpReg)
869  .addReg(UnmergePiece);
870 
871  if (!First) {
872  Register AndReg
873  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
874 
875  // If there are multiple operands to consider, and the conditions.
876  B.buildInstr(AMDGPU::S_AND_B64)
877  .addDef(AndReg)
878  .addReg(NewCondReg)
879  .addReg(CondReg);
880  CondReg = AndReg;
881  }
882  }
883 
884  // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
885  // BUILD_VECTOR
886  if (OpTy.isVector()) {
887  auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
888  Op.setReg(Merge.getReg(0));
889  } else {
890  auto Merge = B.buildMerge(OpTy, ReadlanePieces);
891  Op.setReg(Merge.getReg(0));
892  }
893 
894  MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
895  }
896  }
897  }
898 
899  B.setInsertPt(*LoopBB, LoopBB->end());
900 
901  // Update EXEC, save the original EXEC value to VCC.
902  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
903  .addDef(NewExec)
904  .addReg(CondReg, RegState::Kill);
905 
906  MRI.setSimpleHint(NewExec, CondReg);
907 
908  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
909  B.buildInstr(AMDGPU::S_XOR_B64_term)
910  .addDef(AMDGPU::EXEC)
911  .addReg(AMDGPU::EXEC)
912  .addReg(NewExec);
913 
914  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
915  // s_cbranch_scc0?
916 
917  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
918  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
919  .addMBB(LoopBB);
920 
921  // Save the EXEC mask before the loop.
922  BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
923  .addReg(AMDGPU::EXEC);
924 
925  // Restore the EXEC mask after the loop.
926  B.setMBB(*RestoreExecBB);
927  B.buildInstr(AMDGPU::S_MOV_B64_term)
928  .addDef(AMDGPU::EXEC)
929  .addReg(SaveExecReg);
930 
931  // Restore the insert point before the original instruction.
932  B.setInsertPt(MBB, MBB.end());
933 
934  return true;
935 }
936 
937 bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
939  ArrayRef<unsigned> OpIndices) const {
940  MachineIRBuilder B(MI);
941  return executeInWaterfallLoop(B, MI, MRI, OpIndices);
942 }
943 
944 // Legalize an operand that must be an SGPR by inserting a readfirstlane.
945 void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
946  MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
947  Register Reg = MI.getOperand(OpIdx).getReg();
948  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
949  if (Bank != &AMDGPU::VGPRRegBank)
950  return;
951 
952  MachineIRBuilder B(MI);
953  Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
954  B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
955  .addDef(SGPR)
956  .addReg(Reg);
957 
958  const TargetRegisterClass *Constrained =
959  constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
960  (void)Constrained;
961  assert(Constrained && "Failed to constrain readfirstlane src reg");
962 
963  MI.getOperand(OpIdx).setReg(SGPR);
964 }
965 
966 // When regbankselect repairs registers, it will insert a repair instruction
967 // which defines the repaired register. Then it calls applyMapping and expects
968 // that the targets will either delete or rewrite the originally wrote to the
969 // repaired registers. Beccause of this, we end up in a situation where
970 // we have 2 instructions defining the same registers.
972  Register Reg,
973  const MachineInstr &MI) {
974  // Is there some way we can assert that there are exactly 2 def instructions?
975  for (MachineInstr &Other : MRI.def_instructions(Reg)) {
976  if (&Other != &MI)
977  return &Other;
978  }
979 
980  return nullptr;
981 }
982 
983 bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
984  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
985  MachineRegisterInfo &MRI) const {
986  Register DstReg = MI.getOperand(0).getReg();
987  const LLT LoadTy = MRI.getType(DstReg);
988  unsigned LoadSize = LoadTy.getSizeInBits();
989  const unsigned MaxNonSmrdLoadSize = 128;
990  // 128-bit loads are supported for all instruction types.
991  if (LoadSize <= MaxNonSmrdLoadSize)
992  return false;
993 
994  SmallVector<unsigned, 16> DefRegs(OpdMapper.getVRegs(0));
995  SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
996 
997  // If the pointer is an SGPR, we have nothing to do.
998  if (SrcRegs.empty())
999  return false;
1000 
1001  assert(LoadSize % MaxNonSmrdLoadSize == 0);
1002 
1003  // We want to get the repair instruction now, because it will help us
1004  // determine which instruction the legalizer inserts that will also
1005  // write to DstReg.
1006  MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
1007 
1008  // RegBankSelect only emits scalar types, so we need to reset the pointer
1009  // operand to a pointer type.
1010  Register BasePtrReg = SrcRegs[0];
1011  LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
1012  MRI.setType(BasePtrReg, PtrTy);
1013 
1014  MachineIRBuilder B(MI);
1015 
1016  unsigned SplitElts =
1017  MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
1018  const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType());
1019  ApplyRegBankMapping O(MRI, &AMDGPU::VGPRRegBank);
1020  GISelObserverWrapper Observer(&O);
1021  B.setChangeObserver(Observer);
1022  LegalizerHelper Helper(B.getMF(), Observer, B);
1023  if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
1024  return false;
1025 
1026  // At this point, the legalizer has split the original load into smaller
1027  // loads. At the end of lowering, it inserts an instruction (LegalizedInst)
1028  // that combines the outputs of the lower loads and writes it to DstReg.
1029  // The register bank selector has also added the RepairInst which writes to
1030  // DstReg as well.
1031 
1032  MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
1033 
1034  // Replace the output of the LegalizedInst with a temporary register, since
1035  // RepairInst already defines DstReg.
1036  Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
1037  LegalizedInst->getOperand(0).setReg(TmpReg);
1038  B.setInsertPt(*RepairInst->getParent(), RepairInst);
1039 
1040  for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
1042  B.buildConstant(IdxReg, DefIdx);
1043  MRI.setRegBank(IdxReg, getRegBank(AMDGPU::VGPRRegBankID));
1044  B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
1045  }
1046 
1047  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1048  return true;
1049 }
1050 
1051 bool AMDGPURegisterBankInfo::applyMappingImage(
1052  MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
1053  MachineRegisterInfo &MRI, int RsrcIdx) const {
1054  const int NumDefs = MI.getNumExplicitDefs();
1055 
1056  // The reported argument index is relative to the IR intrinsic call arguments,
1057  // so we need to shift by the number of defs and the intrinsic ID.
1058  RsrcIdx += NumDefs + 1;
1059 
1060  // Insert copies to VGPR arguments.
1061  applyDefaultMapping(OpdMapper);
1062 
1063  // Fixup any SGPR arguments.
1064  SmallVector<unsigned, 4> SGPRIndexes;
1065  for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
1066  if (!MI.getOperand(I).isReg())
1067  continue;
1068 
1069  // If this intrinsic has a sampler, it immediately follows rsrc.
1070  if (I == RsrcIdx || I == RsrcIdx + 1)
1071  SGPRIndexes.push_back(I);
1072  }
1073 
1074  executeInWaterfallLoop(MI, MRI, SGPRIndexes);
1075  return true;
1076 }
1077 
1078 // For cases where only a single copy is inserted for matching register banks.
1079 // Replace the register in the instruction operand
1081  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
1082  SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
1083  if (!SrcReg.empty()) {
1084  assert(SrcReg.size() == 1);
1085  OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
1086  }
1087 }
1088 
1089 /// Handle register layout difference for f16 images for some subtargets.
1090 Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B,
1091  MachineRegisterInfo &MRI,
1092  Register Reg) const {
1093  if (!Subtarget.hasUnpackedD16VMem())
1094  return Reg;
1095 
1096  const LLT S16 = LLT::scalar(16);
1097  LLT StoreVT = MRI.getType(Reg);
1098  if (!StoreVT.isVector() || StoreVT.getElementType() != S16)
1099  return Reg;
1100 
1101  auto Unmerge = B.buildUnmerge(S16, Reg);
1102 
1103 
1104  SmallVector<Register, 4> WideRegs;
1105  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
1106  WideRegs.push_back(Unmerge.getReg(I));
1107 
1108  const LLT S32 = LLT::scalar(32);
1109  int NumElts = StoreVT.getNumElements();
1110 
1111  return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0);
1112 }
1113 
1114 static std::pair<Register, unsigned>
1116  int64_t Const;
1117  if (mi_match(Reg, MRI, m_ICst(Const)))
1118  return std::make_pair(Register(), Const);
1119 
1120  Register Base;
1121  if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const))))
1122  return std::make_pair(Base, Const);
1123 
1124  // TODO: Handle G_OR used for add case
1125  return std::make_pair(Reg, 0);
1126 }
1127 
1128 std::pair<Register, unsigned>
1129 AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
1130  Register OrigOffset) const {
1131  const unsigned MaxImm = 4095;
1132  Register BaseReg;
1133  unsigned ImmOffset;
1134  const LLT S32 = LLT::scalar(32);
1135 
1136  std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
1137  OrigOffset);
1138 
1139  unsigned C1 = 0;
1140  if (ImmOffset != 0) {
1141  // If the immediate value is too big for the immoffset field, put the value
1142  // and -4096 into the immoffset field so that the value that is copied/added
1143  // for the voffset field is a multiple of 4096, and it stands more chance
1144  // of being CSEd with the copy/add for another similar load/store.
1145  // However, do not do that rounding down to a multiple of 4096 if that is a
1146  // negative number, as it appears to be illegal to have a negative offset
1147  // in the vgpr, even if adding the immediate offset makes it positive.
1148  unsigned Overflow = ImmOffset & ~MaxImm;
1149  ImmOffset -= Overflow;
1150  if ((int32_t)Overflow < 0) {
1151  Overflow += ImmOffset;
1152  ImmOffset = 0;
1153  }
1154 
1155  C1 = ImmOffset;
1156  if (Overflow != 0) {
1157  if (!BaseReg)
1158  BaseReg = B.buildConstant(S32, Overflow).getReg(0);
1159  else {
1160  auto OverflowVal = B.buildConstant(S32, Overflow);
1161  BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
1162  }
1163  }
1164  }
1165 
1166  if (!BaseReg)
1167  BaseReg = B.buildConstant(S32, 0).getReg(0);
1168 
1169  return {BaseReg, C1};
1170 }
1171 
1172 static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
1173  int64_t C;
1174  return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
1175 }
1176 
1177 static unsigned extractGLC(unsigned CachePolicy) {
1178  return CachePolicy & 1;
1179 }
1180 
1181 static unsigned extractSLC(unsigned CachePolicy) {
1182  return (CachePolicy >> 1) & 1;
1183 }
1184 
1185 static unsigned extractDLC(unsigned CachePolicy) {
1186  return (CachePolicy >> 2) & 1;
1187 }
1188 
1189 MachineInstr *
1190 AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
1191  MachineInstr &MI) const {
1192  MachineRegisterInfo &MRI = *B.getMRI();
1193  executeInWaterfallLoop(B, MI, MRI, {2, 4});
1194 
1195  // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
1196 
1197  Register VData = MI.getOperand(1).getReg();
1198  LLT Ty = MRI.getType(VData);
1199 
1200  int EltSize = Ty.getScalarSizeInBits();
1201  int Size = Ty.getSizeInBits();
1202 
1203  // FIXME: Broken integer truncstore.
1204  if (EltSize != 32)
1205  report_fatal_error("unhandled intrinsic store");
1206 
1207  // FIXME: Verifier should enforce 1 MMO for these intrinsics.
1208  const int MemSize = (*MI.memoperands_begin())->getSize();
1209 
1210 
1211  Register RSrc = MI.getOperand(2).getReg();
1212  Register VOffset = MI.getOperand(3).getReg();
1213  Register SOffset = MI.getOperand(4).getReg();
1214  unsigned CachePolicy = MI.getOperand(5).getImm();
1215 
1216  unsigned ImmOffset;
1217  std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
1218 
1219  const bool Offen = !isZero(VOffset, MRI);
1220 
1221  unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
1222  switch (8 * MemSize) {
1223  case 8:
1224  Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
1225  AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
1226  break;
1227  case 16:
1228  Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
1229  AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
1230  break;
1231  default:
1232  Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
1233  AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
1234  if (Size > 32)
1235  Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
1236  break;
1237  }
1238 
1239 
1240  // Set the insertion point back to the instruction in case it was moved into a
1241  // loop.
1242  B.setInstr(MI);
1243 
1244  MachineInstrBuilder MIB = B.buildInstr(Opc)
1245  .addUse(VData);
1246 
1247  if (Offen)
1248  MIB.addUse(VOffset);
1249 
1250  MIB.addUse(RSrc)
1251  .addUse(SOffset)
1252  .addImm(ImmOffset)
1253  .addImm(extractGLC(CachePolicy))
1254  .addImm(extractSLC(CachePolicy))
1255  .addImm(0) // tfe: FIXME: Remove from inst
1256  .addImm(extractDLC(CachePolicy))
1257  .cloneMemRefs(MI);
1258 
1259  // FIXME: We need a way to report failure from applyMappingImpl.
1260  // Insert constrain copies before inserting the loop.
1261  if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
1262  report_fatal_error("failed to constrain selected store intrinsic");
1263 
1264  return MIB;
1265 }
1266 
1267 void AMDGPURegisterBankInfo::applyMappingImpl(
1268  const OperandsMapper &OpdMapper) const {
1269  MachineInstr &MI = OpdMapper.getMI();
1270  unsigned Opc = MI.getOpcode();
1271  MachineRegisterInfo &MRI = OpdMapper.getMRI();
1272  switch (Opc) {
1273  case AMDGPU::G_SELECT: {
1274  Register DstReg = MI.getOperand(0).getReg();
1275  LLT DstTy = MRI.getType(DstReg);
1276  if (DstTy.getSizeInBits() != 64)
1277  break;
1278 
1279  LLT HalfTy = getHalfSizedType(DstTy);
1280 
1281  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1282  SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1));
1283  SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1284  SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3));
1285 
1286  // All inputs are SGPRs, nothing special to do.
1287  if (DefRegs.empty()) {
1288  assert(Src1Regs.empty() && Src2Regs.empty());
1289  break;
1290  }
1291 
1292  MachineIRBuilder B(MI);
1293  if (Src0Regs.empty())
1294  Src0Regs.push_back(MI.getOperand(1).getReg());
1295  else {
1296  assert(Src0Regs.size() == 1);
1297  }
1298 
1299  if (Src1Regs.empty())
1300  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1301  else {
1302  setRegsToType(MRI, Src1Regs, HalfTy);
1303  }
1304 
1305  if (Src2Regs.empty())
1306  split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
1307  else
1308  setRegsToType(MRI, Src2Regs, HalfTy);
1309 
1310  setRegsToType(MRI, DefRegs, HalfTy);
1311 
1312  B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
1313  B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
1314 
1315  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1316  MI.eraseFromParent();
1317  return;
1318  }
1319  case AMDGPU::G_AND:
1320  case AMDGPU::G_OR:
1321  case AMDGPU::G_XOR: {
1322  // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
1323  // there is a VGPR input.
1324  Register DstReg = MI.getOperand(0).getReg();
1325  LLT DstTy = MRI.getType(DstReg);
1326  if (DstTy.getSizeInBits() != 64)
1327  break;
1328 
1329  LLT HalfTy = getHalfSizedType(DstTy);
1330  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1331  SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1));
1332  SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1333 
1334  // All inputs are SGPRs, nothing special to do.
1335  if (DefRegs.empty()) {
1336  assert(Src0Regs.empty() && Src1Regs.empty());
1337  break;
1338  }
1339 
1340  assert(DefRegs.size() == 2);
1341  assert(Src0Regs.size() == Src1Regs.size() &&
1342  (Src0Regs.empty() || Src0Regs.size() == 2));
1343 
1344  // Depending on where the source registers came from, the generic code may
1345  // have decided to split the inputs already or not. If not, we still need to
1346  // extract the values.
1347  MachineIRBuilder B(MI);
1348 
1349  if (Src0Regs.empty())
1350  split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
1351  else
1352  setRegsToType(MRI, Src0Regs, HalfTy);
1353 
1354  if (Src1Regs.empty())
1355  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1356  else
1357  setRegsToType(MRI, Src1Regs, HalfTy);
1358 
1359  setRegsToType(MRI, DefRegs, HalfTy);
1360 
1361  B.buildInstr(Opc)
1362  .addDef(DefRegs[0])
1363  .addUse(Src0Regs[0])
1364  .addUse(Src1Regs[0]);
1365 
1366  B.buildInstr(Opc)
1367  .addDef(DefRegs[1])
1368  .addUse(Src0Regs[1])
1369  .addUse(Src1Regs[1]);
1370 
1371  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1372  MI.eraseFromParent();
1373  return;
1374  }
1375  case AMDGPU::G_ADD:
1376  case AMDGPU::G_SUB:
1377  case AMDGPU::G_MUL: {
1378  Register DstReg = MI.getOperand(0).getReg();
1379  LLT DstTy = MRI.getType(DstReg);
1380  if (DstTy != LLT::scalar(16))
1381  break;
1382 
1383  const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1384  if (DstBank == &AMDGPU::VGPRRegBank)
1385  break;
1386 
1387  // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
1388  MachineFunction *MF = MI.getParent()->getParent();
1389  MachineIRBuilder B(MI);
1390  ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1391  GISelObserverWrapper Observer(&ApplySALU);
1392  LegalizerHelper Helper(*MF, Observer, B);
1393 
1394  if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
1396  llvm_unreachable("widen scalar should have succeeded");
1397  return;
1398  }
1399  case AMDGPU::G_SMIN:
1400  case AMDGPU::G_SMAX:
1401  case AMDGPU::G_UMIN:
1402  case AMDGPU::G_UMAX: {
1403  Register DstReg = MI.getOperand(0).getReg();
1404  const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1405  if (DstBank == &AMDGPU::VGPRRegBank)
1406  break;
1407 
1408  MachineFunction *MF = MI.getParent()->getParent();
1409  MachineIRBuilder B(MI);
1410  ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1411  GISelObserverWrapper Observer(&ApplySALU);
1412  LegalizerHelper Helper(*MF, Observer, B);
1413 
1414  // Turn scalar min/max into a compare and select.
1415  LLT Ty = MRI.getType(DstReg);
1416  LLT S32 = LLT::scalar(32);
1417  LLT S16 = LLT::scalar(16);
1418 
1419  if (Ty == S16) {
1420  // Need to widen to s32, and expand as cmp + select.
1421  if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
1422  llvm_unreachable("widenScalar should have succeeded");
1423 
1424  // FIXME: This is relying on widenScalar leaving MI in place.
1425  if (Helper.lower(MI, 0, S32) != LegalizerHelper::Legalized)
1426  llvm_unreachable("lower should have succeeded");
1427  } else {
1428  if (Helper.lower(MI, 0, Ty) != LegalizerHelper::Legalized)
1429  llvm_unreachable("lower should have succeeded");
1430  }
1431 
1432  return;
1433  }
1434  case AMDGPU::G_SEXT:
1435  case AMDGPU::G_ZEXT: {
1436  Register SrcReg = MI.getOperand(1).getReg();
1437  LLT SrcTy = MRI.getType(SrcReg);
1438  bool Signed = Opc == AMDGPU::G_SEXT;
1439 
1440  MachineIRBuilder B(MI);
1441  const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
1442 
1443  Register DstReg = MI.getOperand(0).getReg();
1444  LLT DstTy = MRI.getType(DstReg);
1445  if (DstTy.isScalar() &&
1446  SrcBank != &AMDGPU::SGPRRegBank &&
1447  SrcBank != &AMDGPU::SCCRegBank &&
1448  SrcBank != &AMDGPU::VCCRegBank &&
1449  // FIXME: Should handle any type that round to s64 when irregular
1450  // breakdowns supported.
1451  DstTy.getSizeInBits() == 64 &&
1452  SrcTy.getSizeInBits() <= 32) {
1453  const LLT S32 = LLT::scalar(32);
1454  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1455 
1456  // Extend to 32-bit, and then extend the low half.
1457  if (Signed) {
1458  // TODO: Should really be buildSExtOrCopy
1459  B.buildSExtOrTrunc(DefRegs[0], SrcReg);
1460 
1461  // Replicate sign bit from 32-bit extended part.
1462  auto ShiftAmt = B.buildConstant(S32, 31);
1463  MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1464  B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
1465  } else {
1466  B.buildZExtOrTrunc(DefRegs[0], SrcReg);
1467  B.buildConstant(DefRegs[1], 0);
1468  }
1469 
1470  MRI.setRegBank(DstReg, *SrcBank);
1471  MI.eraseFromParent();
1472  return;
1473  }
1474 
1475  if (SrcTy != LLT::scalar(1))
1476  return;
1477 
1478  if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
1479  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1480 
1481  const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
1482  &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
1483 
1484  unsigned DstSize = DstTy.getSizeInBits();
1485  // 64-bit select is SGPR only
1486  const bool UseSel64 = DstSize > 32 &&
1487  SrcBank->getID() == AMDGPU::SCCRegBankID;
1488 
1489  // TODO: Should s16 select be legal?
1490  LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32);
1491  auto True = B.buildConstant(SelType, Signed ? -1 : 1);
1492  auto False = B.buildConstant(SelType, 0);
1493 
1494  MRI.setRegBank(True.getReg(0), *DstBank);
1495  MRI.setRegBank(False.getReg(0), *DstBank);
1496  MRI.setRegBank(DstReg, *DstBank);
1497 
1498  if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
1499  B.buildSelect(DefRegs[0], SrcReg, True, False);
1500  B.buildCopy(DefRegs[1], DefRegs[0]);
1501  } else if (DstSize < 32) {
1502  auto Sel = B.buildSelect(SelType, SrcReg, True, False);
1503  MRI.setRegBank(Sel.getReg(0), *DstBank);
1504  B.buildTrunc(DstReg, Sel);
1505  } else {
1506  B.buildSelect(DstReg, SrcReg, True, False);
1507  }
1508 
1509  MI.eraseFromParent();
1510  return;
1511  }
1512 
1513  // Fixup the case with an s1 src that isn't a condition register. Use shifts
1514  // instead of introducing a compare to avoid an unnecessary condition
1515  // register (and since there's no scalar 16-bit compares).
1516  auto Ext = B.buildAnyExt(DstTy, SrcReg);
1517  auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
1518  auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
1519 
1520  if (MI.getOpcode() == AMDGPU::G_SEXT)
1521  B.buildAShr(DstReg, Shl, ShiftAmt);
1522  else
1523  B.buildLShr(DstReg, Shl, ShiftAmt);
1524 
1525  MRI.setRegBank(DstReg, *SrcBank);
1526  MRI.setRegBank(Ext.getReg(0), *SrcBank);
1527  MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1528  MRI.setRegBank(Shl.getReg(0), *SrcBank);
1529  MI.eraseFromParent();
1530  return;
1531  }
1532  case AMDGPU::G_BUILD_VECTOR:
1533  case AMDGPU::G_BUILD_VECTOR_TRUNC: {
1534  Register DstReg = MI.getOperand(0).getReg();
1535  LLT DstTy = MRI.getType(DstReg);
1536  if (DstTy != LLT::vector(2, 16))
1537  break;
1538 
1539  assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0)));
1540  substituteSimpleCopyRegs(OpdMapper, 1);
1541  substituteSimpleCopyRegs(OpdMapper, 2);
1542 
1543  const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1544  if (DstBank == &AMDGPU::SGPRRegBank)
1545  break; // Can use S_PACK_* instructions.
1546 
1547  MachineIRBuilder B(MI);
1548 
1549  Register Lo = MI.getOperand(1).getReg();
1550  Register Hi = MI.getOperand(2).getReg();
1551  const LLT S32 = LLT::scalar(32);
1552 
1553  const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
1554  const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
1555 
1556  Register ZextLo;
1557  Register ShiftHi;
1558 
1559  if (Opc == AMDGPU::G_BUILD_VECTOR) {
1560  ZextLo = B.buildZExt(S32, Lo).getReg(0);
1561  MRI.setRegBank(ZextLo, *BankLo);
1562 
1563  Register ZextHi = B.buildZExt(S32, Hi).getReg(0);
1564  MRI.setRegBank(ZextHi, *BankHi);
1565 
1566  auto ShiftAmt = B.buildConstant(S32, 16);
1567  MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
1568 
1569  ShiftHi = B.buildShl(S32, ZextHi, ShiftAmt).getReg(0);
1570  MRI.setRegBank(ShiftHi, *BankHi);
1571  } else {
1572  Register MaskLo = B.buildConstant(S32, 0xffff).getReg(0);
1573  MRI.setRegBank(MaskLo, *BankLo);
1574 
1575  auto ShiftAmt = B.buildConstant(S32, 16);
1576  MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
1577 
1578  ShiftHi = B.buildShl(S32, Hi, ShiftAmt).getReg(0);
1579  MRI.setRegBank(ShiftHi, *BankHi);
1580 
1581  ZextLo = B.buildAnd(S32, Lo, MaskLo).getReg(0);
1582  MRI.setRegBank(ZextLo, *BankLo);
1583  }
1584 
1585  auto Or = B.buildOr(S32, ZextLo, ShiftHi);
1586  MRI.setRegBank(Or.getReg(0), *DstBank);
1587 
1588  B.buildBitcast(DstReg, Or);
1589  MI.eraseFromParent();
1590  return;
1591  }
1592  case AMDGPU::G_EXTRACT_VECTOR_ELT:
1593  applyDefaultMapping(OpdMapper);
1594  executeInWaterfallLoop(MI, MRI, { 2 });
1595  return;
1596  case AMDGPU::G_INTRINSIC: {
1597  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1598  case Intrinsic::amdgcn_s_buffer_load: {
1599  // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
1600  executeInWaterfallLoop(MI, MRI, { 2, 3 });
1601  return;
1602  }
1603  case Intrinsic::amdgcn_readlane: {
1604  substituteSimpleCopyRegs(OpdMapper, 2);
1605 
1606  assert(empty(OpdMapper.getVRegs(0)));
1607  assert(empty(OpdMapper.getVRegs(3)));
1608 
1609  // Make sure the index is an SGPR. It doesn't make sense to run this in a
1610  // waterfall loop, so assume it's a uniform value.
1611  constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1612  return;
1613  }
1614  case Intrinsic::amdgcn_writelane: {
1615  assert(empty(OpdMapper.getVRegs(0)));
1616  assert(empty(OpdMapper.getVRegs(2)));
1617  assert(empty(OpdMapper.getVRegs(3)));
1618 
1619  substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
1620  constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
1621  constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1622  return;
1623  }
1624  default:
1625  break;
1626  }
1627  break;
1628  }
1629  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
1630  auto IntrID = MI.getIntrinsicID();
1631  switch (IntrID) {
1632  case Intrinsic::amdgcn_buffer_load: {
1633  executeInWaterfallLoop(MI, MRI, { 2 });
1634  return;
1635  }
1636  case Intrinsic::amdgcn_ds_ordered_add:
1637  case Intrinsic::amdgcn_ds_ordered_swap: {
1638  // This is only allowed to execute with 1 lane, so readfirstlane is safe.
1639  assert(empty(OpdMapper.getVRegs(0)));
1640  substituteSimpleCopyRegs(OpdMapper, 3);
1641  constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1642  return;
1643  }
1644  case Intrinsic::amdgcn_s_sendmsg:
1645  case Intrinsic::amdgcn_s_sendmsghalt: {
1646  // FIXME: Should this use a waterfall loop?
1647  constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1648  return;
1649  }
1650  case Intrinsic::amdgcn_raw_buffer_load:
1651  case Intrinsic::amdgcn_raw_buffer_load_format:
1652  case Intrinsic::amdgcn_raw_tbuffer_load:
1653  case Intrinsic::amdgcn_raw_buffer_store:
1654  case Intrinsic::amdgcn_raw_buffer_store_format:
1655  case Intrinsic::amdgcn_raw_tbuffer_store: {
1656  applyDefaultMapping(OpdMapper);
1657  executeInWaterfallLoop(MI, MRI, {2, 4});
1658  return;
1659  }
1660  case Intrinsic::amdgcn_struct_buffer_load:
1661  case Intrinsic::amdgcn_struct_buffer_store:
1662  case Intrinsic::amdgcn_struct_tbuffer_load:
1663  case Intrinsic::amdgcn_struct_tbuffer_store: {
1664  applyDefaultMapping(OpdMapper);
1665  executeInWaterfallLoop(MI, MRI, {2, 5});
1666  return;
1667  }
1668  default: {
1669  if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
1670  AMDGPU::lookupRsrcIntrinsic(IntrID)) {
1671  // Non-images can have complications from operands that allow both SGPR
1672  // and VGPR. For now it's too complicated to figure out the final opcode
1673  // to derive the register bank from the MCInstrDesc.
1674  if (RSrcIntrin->IsImage) {
1675  applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
1676  return;
1677  }
1678  }
1679 
1680  break;
1681  }
1682  }
1683  break;
1684  }
1685  case AMDGPU::G_LOAD:
1686  case AMDGPU::G_ZEXTLOAD:
1687  case AMDGPU::G_SEXTLOAD: {
1688  if (applyMappingWideLoad(MI, OpdMapper, MRI))
1689  return;
1690  break;
1691  }
1692  default:
1693  break;
1694  }
1695 
1696  return applyDefaultMapping(OpdMapper);
1697 }
1698 
1699 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
1700  const MachineFunction &MF = *MI.getParent()->getParent();
1701  const MachineRegisterInfo &MRI = MF.getRegInfo();
1702  for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
1703  if (!MI.getOperand(i).isReg())
1704  continue;
1705  Register Reg = MI.getOperand(i).getReg();
1706  if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
1707  if (Bank->getID() == AMDGPU::VGPRRegBankID)
1708  return false;
1709 
1710  assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
1711  Bank->getID() == AMDGPU::SCCRegBankID);
1712  }
1713  }
1714  return true;
1715 }
1716 
1718 AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
1719  const MachineFunction &MF = *MI.getParent()->getParent();
1720  const MachineRegisterInfo &MRI = MF.getRegInfo();
1722 
1723  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1724  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1725  unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
1726  OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
1727  }
1728  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1729  MI.getNumOperands());
1730 }
1731 
1733 AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
1734  const MachineFunction &MF = *MI.getParent()->getParent();
1735  const MachineRegisterInfo &MRI = MF.getRegInfo();
1737  unsigned OpdIdx = 0;
1738 
1739  unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1740  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
1741 
1742  if (MI.getOperand(OpdIdx).isIntrinsicID())
1743  OpdsMapping[OpdIdx++] = nullptr;
1744 
1745  Register Reg1 = MI.getOperand(OpdIdx).getReg();
1746  unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
1747 
1748  unsigned DefaultBankID = Size1 == 1 ?
1749  AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
1750  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
1751 
1752  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
1753 
1754  for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
1755  const MachineOperand &MO = MI.getOperand(OpdIdx);
1756  if (!MO.isReg())
1757  continue;
1758 
1759  unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
1760  unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
1761  OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
1762  }
1763 
1764  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1765  MI.getNumOperands());
1766 }
1767 
1769 AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
1770  const MachineFunction &MF = *MI.getParent()->getParent();
1771  const MachineRegisterInfo &MRI = MF.getRegInfo();
1773 
1774  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
1775  const MachineOperand &Op = MI.getOperand(I);
1776  if (!Op.isReg())
1777  continue;
1778 
1779  unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI);
1780  OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1781  }
1782 
1783  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1784  MI.getNumOperands());
1785 }
1786 
1788 AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
1789  const MachineInstr &MI,
1790  int RsrcIdx) const {
1791  // The reported argument index is relative to the IR intrinsic call arguments,
1792  // so we need to shift by the number of defs and the intrinsic ID.
1793  RsrcIdx += MI.getNumExplicitDefs() + 1;
1794 
1795  const int NumOps = MI.getNumOperands();
1796  SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps);
1797 
1798  // TODO: Should packed/unpacked D16 difference be reported here as part of
1799  // the value mapping?
1800  for (int I = 0; I != NumOps; ++I) {
1801  if (!MI.getOperand(I).isReg())
1802  continue;
1803 
1804  Register OpReg = MI.getOperand(I).getReg();
1805  unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
1806 
1807  // FIXME: Probably need a new intrinsic register bank searchable table to
1808  // handle arbitrary intrinsics easily.
1809  //
1810  // If this has a sampler, it immediately follows rsrc.
1811  const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
1812 
1813  if (MustBeSGPR) {
1814  // If this must be an SGPR, so we must report whatever it is as legal.
1815  unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
1816  OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
1817  } else {
1818  // Some operands must be VGPR, and these are easy to copy to.
1819  OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1820  }
1821  }
1822 
1823  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
1824 }
1825 
1827 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
1828 
1829  const MachineFunction &MF = *MI.getParent()->getParent();
1830  const MachineRegisterInfo &MRI = MF.getRegInfo();
1832  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1833  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
1834  Register PtrReg = MI.getOperand(1).getReg();
1835  LLT PtrTy = MRI.getType(PtrReg);
1836  unsigned AS = PtrTy.getAddressSpace();
1837  unsigned PtrSize = PtrTy.getSizeInBits();
1838 
1839  const ValueMapping *ValMapping;
1840  const ValueMapping *PtrMapping;
1841 
1844  // We have a uniform instruction so we want to use an SMRD load
1845  ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1846  PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
1847  } else {
1848  ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
1849  PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
1850  }
1851 
1852  OpdsMapping[0] = ValMapping;
1853  OpdsMapping[1] = PtrMapping;
1855  1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
1856  return Mapping;
1857 
1858  // FIXME: Do we want to add a mapping for FLAT load, or should we just
1859  // handle that during instruction selection?
1860 }
1861 
1862 unsigned
1863 AMDGPURegisterBankInfo::getRegBankID(Register Reg,
1864  const MachineRegisterInfo &MRI,
1865  const TargetRegisterInfo &TRI,
1866  unsigned Default) const {
1867 
1868  const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
1869  return Bank ? Bank->getID() : Default;
1870 }
1871 
1872 
1873 static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
1874  return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
1875  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1876 }
1877 
1879 AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
1880  const MachineRegisterInfo &MRI,
1881  const TargetRegisterInfo &TRI) const {
1882  // Lie and claim anything is legal, even though this needs to be an SGPR
1883  // applyMapping will have to deal with it as a waterfall loop.
1884  unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
1885  unsigned Size = getSizeInBits(Reg, MRI, TRI);
1886  return AMDGPU::getValueMapping(Bank, Size);
1887 }
1888 
1890 AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
1891  const MachineRegisterInfo &MRI,
1892  const TargetRegisterInfo &TRI) const {
1893  unsigned Size = getSizeInBits(Reg, MRI, TRI);
1894  return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1895 }
1896 
1897 ///
1898 /// This function must return a legal mapping, because
1899 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
1900 /// in RegBankSelect::Mode::Fast. Any mapping that would cause a
1901 /// VGPR to SGPR generated is illegal.
1902 ///
1905  const MachineFunction &MF = *MI.getParent()->getParent();
1906  const MachineRegisterInfo &MRI = MF.getRegInfo();
1907 
1908  if (MI.isRegSequence()) {
1909  // If any input is a VGPR, the result must be a VGPR. The default handling
1910  // assumes any copy between banks is legal.
1911  unsigned BankID = AMDGPU::SGPRRegBankID;
1912 
1913  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1914  auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
1915  // It doesn't make sense to use vcc or scc banks here, so just ignore
1916  // them.
1917  if (OpBank != AMDGPU::SGPRRegBankID) {
1918  BankID = AMDGPU::VGPRRegBankID;
1919  break;
1920  }
1921  }
1922  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1923 
1924  const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID));
1925  return getInstructionMapping(
1926  1, /*Cost*/ 1,
1927  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1928  }
1929 
1930  // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
1931  // properly.
1932  //
1933  // TODO: There are additional exec masking dependencies to analyze.
1934  if (MI.getOpcode() == TargetOpcode::G_PHI) {
1935  // TODO: Generate proper invalid bank enum.
1936  int ResultBank = -1;
1937 
1938  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1939  Register Reg = MI.getOperand(I).getReg();
1940  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
1941 
1942  // FIXME: Assuming VGPR for any undetermined inputs.
1943  if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {
1944  ResultBank = AMDGPU::VGPRRegBankID;
1945  break;
1946  }
1947 
1948  unsigned OpBank = Bank->getID();
1949  // scc, scc -> sgpr
1950  if (OpBank == AMDGPU::SCCRegBankID) {
1951  // There's only one SCC register, so a phi requires copying to SGPR.
1952  OpBank = AMDGPU::SGPRRegBankID;
1953  } else if (OpBank == AMDGPU::VCCRegBankID) {
1954  // vcc, vcc -> vcc
1955  // vcc, sgpr -> vgpr
1956  if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) {
1957  ResultBank = AMDGPU::VGPRRegBankID;
1958  break;
1959  }
1960  }
1961 
1962  ResultBank = OpBank;
1963  }
1964 
1965  assert(ResultBank != -1);
1966 
1967  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1968 
1969  const ValueMapping &ValMap =
1970  getValueMapping(0, Size, getRegBank(ResultBank));
1971  return getInstructionMapping(
1972  1, /*Cost*/ 1,
1973  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1974  }
1975 
1977  if (Mapping.isValid())
1978  return Mapping;
1979 
1981 
1982  switch (MI.getOpcode()) {
1983  default:
1985 
1986  case AMDGPU::G_AND:
1987  case AMDGPU::G_OR:
1988  case AMDGPU::G_XOR: {
1989  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1990  if (Size == 1) {
1991  const RegisterBank *DstBank
1992  = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
1993 
1994  unsigned TargetBankID = -1;
1995  unsigned BankLHS = -1;
1996  unsigned BankRHS = -1;
1997  if (DstBank) {
1998  TargetBankID = DstBank->getID();
1999  if (DstBank == &AMDGPU::VCCRegBank) {
2000  TargetBankID = AMDGPU::VCCRegBankID;
2001  BankLHS = AMDGPU::VCCRegBankID;
2002  BankRHS = AMDGPU::VCCRegBankID;
2003  } else if (DstBank == &AMDGPU::SCCRegBank) {
2004  TargetBankID = AMDGPU::SCCRegBankID;
2005  BankLHS = AMDGPU::SGPRRegBankID;
2006  BankRHS = AMDGPU::SGPRRegBankID;
2007  } else {
2008  BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
2009  AMDGPU::SGPRRegBankID);
2010  BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2011  AMDGPU::SGPRRegBankID);
2012  }
2013  } else {
2014  BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
2015  AMDGPU::VCCRegBankID);
2016  BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2017  AMDGPU::VCCRegBankID);
2018 
2019  // Both inputs should be true booleans to produce a boolean result.
2020  if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
2021  TargetBankID = AMDGPU::VGPRRegBankID;
2022  } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
2023  TargetBankID = AMDGPU::VCCRegBankID;
2024  BankLHS = AMDGPU::VCCRegBankID;
2025  BankRHS = AMDGPU::VCCRegBankID;
2026  } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
2027  TargetBankID = AMDGPU::SGPRRegBankID;
2028  } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) {
2029  // The operation must be done on a 32-bit register, but it will set
2030  // scc. The result type could interchangably be SCC or SGPR, since
2031  // both values will be produced.
2032  TargetBankID = AMDGPU::SCCRegBankID;
2033  BankLHS = AMDGPU::SGPRRegBankID;
2034  BankRHS = AMDGPU::SGPRRegBankID;
2035  }
2036  }
2037 
2038  OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
2039  OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
2040  OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
2041  break;
2042  }
2043 
2044  if (Size == 64) {
2045 
2046  if (isSALUMapping(MI)) {
2047  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
2048  OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
2049  } else {
2050  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
2051  unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
2052  OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
2053 
2054  unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
2055  OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
2056  }
2057 
2058  break;
2059  }
2060 
2062  }
2063  case AMDGPU::G_GEP:
2064  case AMDGPU::G_ADD:
2065  case AMDGPU::G_SUB:
2066  case AMDGPU::G_MUL:
2067  case AMDGPU::G_SHL:
2068  case AMDGPU::G_LSHR:
2069  case AMDGPU::G_ASHR:
2070  case AMDGPU::G_UADDO:
2071  case AMDGPU::G_SADDO:
2072  case AMDGPU::G_USUBO:
2073  case AMDGPU::G_SSUBO:
2074  case AMDGPU::G_UADDE:
2075  case AMDGPU::G_SADDE:
2076  case AMDGPU::G_USUBE:
2077  case AMDGPU::G_SSUBE:
2078  case AMDGPU::G_SMIN:
2079  case AMDGPU::G_SMAX:
2080  case AMDGPU::G_UMIN:
2081  case AMDGPU::G_UMAX:
2082  if (isSALUMapping(MI))
2083  return getDefaultMappingSOP(MI);
2085 
2086  case AMDGPU::G_FADD:
2087  case AMDGPU::G_FSUB:
2088  case AMDGPU::G_FPTOSI:
2089  case AMDGPU::G_FPTOUI:
2090  case AMDGPU::G_FMUL:
2091  case AMDGPU::G_FMA:
2092  case AMDGPU::G_FMAD:
2093  case AMDGPU::G_FSQRT:
2094  case AMDGPU::G_FFLOOR:
2095  case AMDGPU::G_FCEIL:
2096  case AMDGPU::G_FRINT:
2097  case AMDGPU::G_SITOFP:
2098  case AMDGPU::G_UITOFP:
2099  case AMDGPU::G_FPTRUNC:
2100  case AMDGPU::G_FPEXT:
2101  case AMDGPU::G_FEXP2:
2102  case AMDGPU::G_FLOG2:
2103  case AMDGPU::G_FMINNUM:
2104  case AMDGPU::G_FMAXNUM:
2105  case AMDGPU::G_FMINNUM_IEEE:
2106  case AMDGPU::G_FMAXNUM_IEEE:
2107  case AMDGPU::G_FCANONICALIZE:
2108  case AMDGPU::G_INTRINSIC_TRUNC:
2109  case AMDGPU::G_INTRINSIC_ROUND:
2110  return getDefaultMappingVOP(MI);
2111  case AMDGPU::G_UMULH:
2112  case AMDGPU::G_SMULH: {
2114  isSALUMapping(MI))
2115  return getDefaultMappingSOP(MI);
2116  return getDefaultMappingVOP(MI);
2117  }
2118  case AMDGPU::G_IMPLICIT_DEF: {
2119  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2120  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2121  break;
2122  }
2123  case AMDGPU::G_FCONSTANT:
2124  case AMDGPU::G_CONSTANT:
2125  case AMDGPU::G_FRAME_INDEX:
2126  case AMDGPU::G_GLOBAL_VALUE:
2127  case AMDGPU::G_BLOCK_ADDR: {
2128  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2129  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2130  break;
2131  }
2132  case AMDGPU::G_INSERT: {
2133  unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
2134  AMDGPU::VGPRRegBankID;
2135  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
2136  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
2137  unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
2138  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
2139  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
2140  OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
2141  OpdsMapping[3] = nullptr;
2142  break;
2143  }
2144  case AMDGPU::G_EXTRACT: {
2145  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
2146  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
2147  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
2148  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
2149  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
2150  OpdsMapping[2] = nullptr;
2151  break;
2152  }
2153  case AMDGPU::G_BUILD_VECTOR:
2154  case AMDGPU::G_BUILD_VECTOR_TRUNC: {
2155  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2156  if (DstTy == LLT::vector(2, 16)) {
2157  unsigned DstSize = DstTy.getSizeInBits();
2158  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2159  unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
2160  unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2161  unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
2162 
2163  OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
2164  OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
2165  OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
2166  break;
2167  }
2168 
2170  }
2171  case AMDGPU::G_MERGE_VALUES:
2172  case AMDGPU::G_CONCAT_VECTORS: {
2173  unsigned Bank = isSALUMapping(MI) ?
2174  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
2175  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2176  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2177 
2178  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
2179  // Op1 and Dst should use the same register bank.
2180  for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)
2181  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
2182  break;
2183  }
2184  case AMDGPU::G_BITCAST:
2185  case AMDGPU::G_INTTOPTR:
2186  case AMDGPU::G_PTRTOINT:
2187  case AMDGPU::G_CTLZ:
2188  case AMDGPU::G_CTLZ_ZERO_UNDEF:
2189  case AMDGPU::G_CTTZ:
2190  case AMDGPU::G_CTTZ_ZERO_UNDEF:
2191  case AMDGPU::G_CTPOP:
2192  case AMDGPU::G_BSWAP:
2193  case AMDGPU::G_BITREVERSE:
2194  case AMDGPU::G_FABS:
2195  case AMDGPU::G_FNEG: {
2196  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2197  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
2198  OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
2199  break;
2200  }
2201  case AMDGPU::G_TRUNC: {
2202  Register Dst = MI.getOperand(0).getReg();
2203  Register Src = MI.getOperand(1).getReg();
2204  unsigned Bank = getRegBankID(Src, MRI, *TRI);
2205  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
2206  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
2207  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
2208  OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
2209  break;
2210  }
2211  case AMDGPU::G_ZEXT:
2212  case AMDGPU::G_SEXT:
2213  case AMDGPU::G_ANYEXT: {
2214  Register Dst = MI.getOperand(0).getReg();
2215  Register Src = MI.getOperand(1).getReg();
2216  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
2217  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
2218 
2219  unsigned DstBank;
2220  const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI);
2221  assert(SrcBank);
2222  switch (SrcBank->getID()) {
2223  case AMDGPU::SCCRegBankID:
2224  case AMDGPU::SGPRRegBankID:
2225  DstBank = AMDGPU::SGPRRegBankID;
2226  break;
2227  default:
2228  DstBank = AMDGPU::VGPRRegBankID;
2229  break;
2230  }
2231 
2232  // TODO: Should anyext be split into 32-bit part as well?
2233  if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
2234  OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
2235  OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
2236  } else {
2237  // Scalar extend can use 64-bit BFE, but VGPRs require extending to
2238  // 32-bits, and then to 64.
2239  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
2240  OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
2241  SrcSize);
2242  }
2243  break;
2244  }
2245  case AMDGPU::G_FCMP: {
2246  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2247  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2248  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
2249  OpdsMapping[1] = nullptr; // Predicate Operand.
2250  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
2251  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
2252  break;
2253  }
2254  case AMDGPU::G_STORE: {
2255  assert(MI.getOperand(0).isReg());
2256  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2257  // FIXME: We need to specify a different reg bank once scalar stores
2258  // are supported.
2259  const ValueMapping *ValMapping =
2260  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
2261  // FIXME: Depending on the type of store, the pointer could be in
2262  // the SGPR Reg bank.
2263  // FIXME: Pointer size should be based on the address space.
2264  const ValueMapping *PtrMapping =
2265  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
2266 
2267  OpdsMapping[0] = ValMapping;
2268  OpdsMapping[1] = PtrMapping;
2269  break;
2270  }
2271 
2272  case AMDGPU::G_ICMP: {
2273  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2274  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2275  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2276  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
2277 
2278  bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
2279  Op3Bank == AMDGPU::SGPRRegBankID &&
2280  (Size == 32 || (Size == 64 &&
2281  (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
2283 
2284  unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
2285 
2286  OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
2287  OpdsMapping[1] = nullptr; // Predicate Operand.
2288  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
2289  OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
2290  break;
2291  }
2292  case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2293  unsigned OutputBankID = isSALUMapping(MI) ?
2294  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
2295  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2296  unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2297  unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2298 
2299  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
2300  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
2301 
2302  // The index can be either if the source vector is VGPR.
2303  OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2304  break;
2305  }
2306  case AMDGPU::G_INSERT_VECTOR_ELT: {
2307  unsigned OutputBankID = isSALUMapping(MI) ?
2308  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
2309 
2310  unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2311  unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2312  unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
2313  unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2314  unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
2315 
2316  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
2317  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
2318  OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
2319 
2320  // The index can be either if the source vector is VGPR.
2321  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2322  break;
2323  }
2324  case AMDGPU::G_UNMERGE_VALUES: {
2325  unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
2326  AMDGPU::VGPRRegBankID;
2327 
2328  // Op1 and Dst should use the same register bank.
2329  // FIXME: Shouldn't this be the default? Why do we need to handle this?
2330  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2331  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
2332  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
2333  }
2334  break;
2335  }
2336  case AMDGPU::G_INTRINSIC: {
2337  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
2338  default:
2340  case Intrinsic::amdgcn_div_fmas:
2341  case Intrinsic::amdgcn_trig_preop:
2342  case Intrinsic::amdgcn_sin:
2343  case Intrinsic::amdgcn_cos:
2344  case Intrinsic::amdgcn_log_clamp:
2345  case Intrinsic::amdgcn_rcp:
2346  case Intrinsic::amdgcn_rcp_legacy:
2347  case Intrinsic::amdgcn_rsq:
2348  case Intrinsic::amdgcn_rsq_legacy:
2349  case Intrinsic::amdgcn_rsq_clamp:
2350  case Intrinsic::amdgcn_ldexp:
2351  case Intrinsic::amdgcn_frexp_mant:
2352  case Intrinsic::amdgcn_frexp_exp:
2353  case Intrinsic::amdgcn_fract:
2354  case Intrinsic::amdgcn_cvt_pkrtz:
2355  case Intrinsic::amdgcn_cvt_pknorm_i16:
2356  case Intrinsic::amdgcn_cvt_pknorm_u16:
2357  case Intrinsic::amdgcn_cvt_pk_i16:
2358  case Intrinsic::amdgcn_cvt_pk_u16:
2359  case Intrinsic::amdgcn_fmed3:
2360  case Intrinsic::amdgcn_cubeid:
2361  case Intrinsic::amdgcn_cubema:
2362  case Intrinsic::amdgcn_cubesc:
2363  case Intrinsic::amdgcn_cubetc:
2364  case Intrinsic::amdgcn_sffbh:
2365  case Intrinsic::amdgcn_fmad_ftz:
2366  case Intrinsic::amdgcn_mbcnt_lo:
2367  case Intrinsic::amdgcn_mbcnt_hi:
2368  case Intrinsic::amdgcn_ubfe:
2369  case Intrinsic::amdgcn_sbfe:
2370  case Intrinsic::amdgcn_lerp:
2371  case Intrinsic::amdgcn_sad_u8:
2372  case Intrinsic::amdgcn_msad_u8:
2373  case Intrinsic::amdgcn_sad_hi_u8:
2374  case Intrinsic::amdgcn_sad_u16:
2375  case Intrinsic::amdgcn_qsad_pk_u16_u8:
2376  case Intrinsic::amdgcn_mqsad_pk_u16_u8:
2377  case Intrinsic::amdgcn_mqsad_u32_u8:
2378  case Intrinsic::amdgcn_cvt_pk_u8_f32:
2379  case Intrinsic::amdgcn_alignbit:
2380  case Intrinsic::amdgcn_alignbyte:
2381  case Intrinsic::amdgcn_fdot2:
2382  case Intrinsic::amdgcn_sdot2:
2383  case Intrinsic::amdgcn_udot2:
2384  case Intrinsic::amdgcn_sdot4:
2385  case Intrinsic::amdgcn_udot4:
2386  case Intrinsic::amdgcn_sdot8:
2387  case Intrinsic::amdgcn_udot8:
2388  case Intrinsic::amdgcn_wwm:
2389  case Intrinsic::amdgcn_wqm:
2390  return getDefaultMappingVOP(MI);
2391  case Intrinsic::amdgcn_ds_swizzle:
2392  case Intrinsic::amdgcn_ds_permute:
2393  case Intrinsic::amdgcn_ds_bpermute:
2394  case Intrinsic::amdgcn_update_dpp:
2395  return getDefaultMappingAllVGPR(MI);
2396  case Intrinsic::amdgcn_kernarg_segment_ptr:
2397  case Intrinsic::amdgcn_s_getpc:
2398  case Intrinsic::amdgcn_groupstaticsize: {
2399  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2400  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2401  break;
2402  }
2403  case Intrinsic::amdgcn_wqm_vote: {
2404  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2405  OpdsMapping[0] = OpdsMapping[2]
2406  = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
2407  break;
2408  }
2409  case Intrinsic::amdgcn_s_buffer_load: {
2410  // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
2411  Register RSrc = MI.getOperand(2).getReg(); // SGPR
2412  Register Offset = MI.getOperand(3).getReg(); // SGPR/imm
2413 
2414  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2415  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
2416  unsigned Size3 = MRI.getType(Offset).getSizeInBits();
2417 
2418  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
2419  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
2420 
2421  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
2422  OpdsMapping[1] = nullptr; // intrinsic id
2423 
2424  // Lie and claim everything is legal, even though some need to be
2425  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2426  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
2427  OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
2428  OpdsMapping[4] = nullptr;
2429  break;
2430  }
2431  case Intrinsic::amdgcn_div_scale: {
2432  unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2433  unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2434  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
2435  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
2436 
2437  unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
2438  OpdsMapping[3] = AMDGPU::getValueMapping(
2439  getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
2440  OpdsMapping[4] = AMDGPU::getValueMapping(
2441  getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
2442 
2443  break;
2444  }
2445  case Intrinsic::amdgcn_class: {
2446  Register Src0Reg = MI.getOperand(2).getReg();
2447  Register Src1Reg = MI.getOperand(3).getReg();
2448  unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();
2449  unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
2450  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2451  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
2452  OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
2453  Src0Size);
2454  OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
2455  Src1Size);
2456  break;
2457  }
2458  case Intrinsic::amdgcn_icmp:
2459  case Intrinsic::amdgcn_fcmp: {
2460  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2461  // This is not VCCRegBank because this is not used in boolean contexts.
2462  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2463  unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2464  unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2465  unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
2466  OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
2467  OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
2468  break;
2469  }
2470  case Intrinsic::amdgcn_readlane: {
2471  // This must be an SGPR, but accept a VGPR.
2472  Register IdxReg = MI.getOperand(3).getReg();
2473  unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2474  unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2475  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2477  }
2478  case Intrinsic::amdgcn_readfirstlane: {
2479  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2480  unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2481  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2482  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2483  break;
2484  }
2485  case Intrinsic::amdgcn_writelane: {
2486  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2487  Register SrcReg = MI.getOperand(2).getReg();
2488  unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
2489  unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2490  Register IdxReg = MI.getOperand(3).getReg();
2491  unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2492  unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2493  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2494 
2495  // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
2496  // to legalize.
2497  OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
2498  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2499  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2500  break;
2501  }
2502  case Intrinsic::amdgcn_if_break: {
2503  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
2504  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2505  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
2506  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2507  break;
2508  }
2509  }
2510  break;
2511  }
2512  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
2513  auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
2514  switch (IntrID) {
2515  case Intrinsic::amdgcn_s_getreg:
2516  case Intrinsic::amdgcn_s_memtime:
2517  case Intrinsic::amdgcn_s_memrealtime:
2518  case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
2519  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2520  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2521  break;
2522  }
2523  case Intrinsic::amdgcn_ds_append:
2524  case Intrinsic::amdgcn_ds_consume:
2525  case Intrinsic::amdgcn_ds_fadd:
2526  case Intrinsic::amdgcn_ds_fmin:
2527  case Intrinsic::amdgcn_ds_fmax:
2528  case Intrinsic::amdgcn_atomic_inc:
2529  case Intrinsic::amdgcn_atomic_dec:
2530  return getDefaultMappingAllVGPR(MI);
2531  case Intrinsic::amdgcn_ds_ordered_add:
2532  case Intrinsic::amdgcn_ds_ordered_swap: {
2533  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2534  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2535  unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2536  AMDGPU::SGPRRegBankID);
2537  OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
2538  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2539  break;
2540  }
2541  case Intrinsic::amdgcn_exp_compr:
2542  OpdsMapping[0] = nullptr; // IntrinsicID
2543  // FIXME: These are immediate values which can't be read from registers.
2544  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2545  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2546  // FIXME: Could we support packed types here?
2547  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2548  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2549  // FIXME: These are immediate values which can't be read from registers.
2550  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2551  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2552  break;
2553  case Intrinsic::amdgcn_exp:
2554  // FIXME: Could we support packed types here?
2555  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2556  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2557  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2558  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2559  break;
2560  case Intrinsic::amdgcn_buffer_load: {
2561  Register RSrc = MI.getOperand(2).getReg(); // SGPR
2562  Register VIndex = MI.getOperand(3).getReg(); // VGPR
2563  Register Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
2564 
2565  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2566  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
2567  unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
2568  unsigned Size4 = MRI.getType(Offset).getSizeInBits();
2569 
2570  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
2571  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
2572 
2573  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
2574  OpdsMapping[1] = nullptr; // intrinsic id
2575 
2576  // Lie and claim everything is legal, even though some need to be
2577  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2578  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
2579  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
2580  OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
2581  OpdsMapping[5] = nullptr;
2582  OpdsMapping[6] = nullptr;
2583  break;
2584  }
2585  case Intrinsic::amdgcn_s_sendmsg:
2586  case Intrinsic::amdgcn_s_sendmsghalt: {
2587  // This must be an SGPR, but accept a VGPR.
2588  unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2589  AMDGPU::SGPRRegBankID);
2590  OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
2591  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
2592  break;
2593  }
2594  case Intrinsic::amdgcn_end_cf: {
2595  unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
2596  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2597  break;
2598  }
2599  case Intrinsic::amdgcn_else: {
2600  unsigned WaveSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
2601  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
2602  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
2603  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
2604  break;
2605  }
2606  case Intrinsic::amdgcn_kill: {
2607  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
2608  break;
2609  }
2610  case Intrinsic::amdgcn_raw_buffer_load:
2611  case Intrinsic::amdgcn_raw_tbuffer_load: {
2612  // FIXME: Should make intrinsic ID the last operand of the instruction,
2613  // then this would be the same as store
2614  OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
2615  OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
2616  OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
2617  OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
2618  break;
2619  }
2620  case Intrinsic::amdgcn_raw_buffer_store:
2621  case Intrinsic::amdgcn_raw_buffer_store_format:
2622  case Intrinsic::amdgcn_raw_tbuffer_store: {
2623  OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
2624  OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
2625  OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
2626  OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
2627  break;
2628  }
2629  case Intrinsic::amdgcn_struct_buffer_load:
2630  case Intrinsic::amdgcn_struct_tbuffer_load: {
2631  OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
2632  OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
2633  OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
2634  OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
2635  OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
2636  break;
2637  }
2638  case Intrinsic::amdgcn_struct_buffer_store:
2639  case Intrinsic::amdgcn_struct_tbuffer_store: {
2640  OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
2641  OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
2642  OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
2643  OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
2644  OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
2645  break;
2646  }
2647  default:
2648  if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
2649  AMDGPU::lookupRsrcIntrinsic(IntrID)) {
2650  // Non-images can have complications from operands that allow both SGPR
2651  // and VGPR. For now it's too complicated to figure out the final opcode
2652  // to derive the register bank from the MCInstrDesc.
2653  if (RSrcIntrin->IsImage)
2654  return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
2655  }
2656 
2658  }
2659  break;
2660  }
2661  case AMDGPU::G_SELECT: {
2662  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2663  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2664  AMDGPU::SGPRRegBankID);
2665  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI,
2666  AMDGPU::SGPRRegBankID);
2667  bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
2668  Op3Bank == AMDGPU::SGPRRegBankID;
2669 
2670  unsigned CondBankDefault = SGPRSrcs ?
2671  AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
2672  unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
2673  CondBankDefault);
2674  if (CondBank == AMDGPU::SGPRRegBankID)
2675  CondBank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
2676  else if (CondBank == AMDGPU::VGPRRegBankID)
2677  CondBank = AMDGPU::VCCRegBankID;
2678 
2679  unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SCCRegBankID ?
2680  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
2681 
2682  assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SCCRegBankID);
2683 
2684  if (Size == 64) {
2685  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2686  OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2687  OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2688  OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2689  } else {
2690  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
2691  OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2692  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
2693  OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
2694  }
2695 
2696  break;
2697  }
2698 
2699  case AMDGPU::G_LOAD:
2700  case AMDGPU::G_ZEXTLOAD:
2701  case AMDGPU::G_SEXTLOAD:
2702  return getInstrMappingForLoad(MI);
2703 
2704  case AMDGPU::G_ATOMICRMW_XCHG:
2705  case AMDGPU::G_ATOMICRMW_ADD:
2706  case AMDGPU::G_ATOMICRMW_SUB:
2707  case AMDGPU::G_ATOMICRMW_AND:
2708  case AMDGPU::G_ATOMICRMW_OR:
2709  case AMDGPU::G_ATOMICRMW_XOR:
2710  case AMDGPU::G_ATOMICRMW_MAX:
2711  case AMDGPU::G_ATOMICRMW_MIN:
2712  case AMDGPU::G_ATOMICRMW_UMAX:
2713  case AMDGPU::G_ATOMICRMW_UMIN:
2714  case AMDGPU::G_ATOMICRMW_FADD:
2715  case AMDGPU::G_ATOMIC_CMPXCHG: {
2716  return getDefaultMappingAllVGPR(MI);
2717  }
2718  case AMDGPU::G_BRCOND: {
2719  unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
2720  AMDGPU::SGPRRegBankID);
2721  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
2722  if (Bank != AMDGPU::SCCRegBankID)
2723  Bank = AMDGPU::VCCRegBankID;
2724 
2725  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
2726  break;
2727  }
2728  }
2729 
2730  return getInstructionMapping(/*ID*/1, /*Cost*/1,
2731  getOperandsMapping(OpdsMapping),
2732  MI.getNumOperands());
2733 }
2734 
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
uint64_t CallInst * C
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Interface definition for SIRegisterInfo.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End...
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:510
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
Register getReg(unsigned Idx) const
Get the register for the operand index.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getScalarSizeInBits() const
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
Helper class that represents how the value of an instruction may be mapped and what is the related co...
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
bool isScalar() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:953
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
LLT getScalarType() const
bool isRegSequence() const
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t getSize() const
Return the size in bytes of the memory reference.
const SIInstrInfo * getInstrInfo() const override
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
Address space for region memory. (GDS)
Definition: AMDGPU.h:271
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool hasScalarMulHiInsts() const
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
bool isIntrinsicID() const
bool isVector() const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
A description of a memory reference used in the backend.
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
mir Rename Register Operands
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
bool isSGPRClass(const TargetRegisterClass *RC) const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
static void substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
MachineFunction & getMF()
Getter for the function we currently build.
static unsigned extractDLC(unsigned CachePolicy)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
const RegisterBank * RegBank
Register bank where the partial value lives.
void setChangeObserver(GISelChangeObserver &Observer)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
bool hasScalarCompareEq64() const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:655
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Helper class to build MachineInstr.
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned Length
Length of this mapping in bits.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:567
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static unsigned extractGLC(unsigned CachePolicy)
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:499
unsigned getAddressSpace() const
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isZero(Register Reg, MachineRegisterInfo &MRI)
uint64_t getAlignment() const
Return the minimum known alignment in bytes of the actual memory reference.
Address space for local memory.
Definition: AMDGPU.h:274
bool hasUnpackedD16VMem() const
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:197
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static unsigned getIntrinsicID(const SDNode *N)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
static LLT getHalfSizedType(LLT Ty)
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static MachineInstr * getOtherVRegDef(const MachineRegisterInfo &MRI, Register Reg, const MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:111
static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI)
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
Helper struct that represents how a value is mapped through different register banks.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
This file declares the MachineIRBuilder class.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Instruction has been legalized and the MachineFunction changed.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override
Get a register bank that covers RC.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
ConstantMatch m_ICst(int64_t &Cst)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_ADD Op0, Op1.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
#define I(x, y, z)
Definition: MD5.cpp:58
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
uint32_t Size
Definition: Profile.cpp:46
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
This class provides the information for the target register banks.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr & getMI() const
Helper struct that represents how a value is mapped through different register banks.
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
iterator_range< def_instr_iterator > def_instructions(unsigned Reg) const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:265
operand_type_match m_Reg()
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Simple wrapper observer that takes several observers, and calls each one for each event...
Register getReg() const
getReg - Returns the register number.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static unsigned extractSLC(unsigned CachePolicy)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
unsigned getPredicate() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164