LLVM  7.0.0svn
R600InstrInfo.cpp
Go to the documentation of this file.
1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600 Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600InstrInfo.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600FrameLowering.h"
21 #include "R600RegisterInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
36 #include <algorithm>
37 #include <cassert>
38 #include <cstdint>
39 #include <cstring>
40 #include <iterator>
41 #include <utility>
42 #include <vector>
43 
44 using namespace llvm;
45 
46 #define GET_INSTRINFO_CTOR_DTOR
47 #include "AMDGPUGenDFAPacketizer.inc"
48 
50  : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
51 
53  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
54 }
55 
58  const DebugLoc &DL, unsigned DestReg,
59  unsigned SrcReg, bool KillSrc) const {
60  unsigned VectorComponents = 0;
61  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
62  AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
63  (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
64  AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
65  VectorComponents = 4;
66  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
67  AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
68  (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
69  AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
70  VectorComponents = 2;
71  }
72 
73  if (VectorComponents > 0) {
74  for (unsigned I = 0; I < VectorComponents; I++) {
75  unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I);
76  buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
77  RI.getSubReg(DestReg, SubRegIndex),
78  RI.getSubReg(SrcReg, SubRegIndex))
79  .addReg(DestReg,
81  }
82  } else {
83  MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
84  DestReg, SrcReg);
85  NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
86  .setIsKill(KillSrc);
87  }
88 }
89 
90 /// \returns true if \p MBBI can be moved into a new basic.
92  MachineBasicBlock::iterator MBBI) const {
93  for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
94  E = MBBI->operands_end(); I != E; ++I) {
95  if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
96  I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
97  return false;
98  }
99  return true;
100 }
101 
102 bool R600InstrInfo::isMov(unsigned Opcode) const {
103  switch(Opcode) {
104  default:
105  return false;
106  case AMDGPU::MOV:
107  case AMDGPU::MOV_IMM_F32:
108  case AMDGPU::MOV_IMM_I32:
109  return true;
110  }
111 }
112 
113 bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
114  return false;
115 }
116 
117 bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
118  switch(Opcode) {
119  default: return false;
120  case AMDGPU::CUBE_r600_pseudo:
121  case AMDGPU::CUBE_r600_real:
122  case AMDGPU::CUBE_eg_pseudo:
123  case AMDGPU::CUBE_eg_real:
124  return true;
125  }
126 }
127 
128 bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
129  unsigned TargetFlags = get(Opcode).TSFlags;
130 
131  return (TargetFlags & R600_InstFlag::ALU_INST);
132 }
133 
134 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
135  unsigned TargetFlags = get(Opcode).TSFlags;
136 
137  return ((TargetFlags & R600_InstFlag::OP1) |
138  (TargetFlags & R600_InstFlag::OP2) |
139  (TargetFlags & R600_InstFlag::OP3));
140 }
141 
142 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
143  unsigned TargetFlags = get(Opcode).TSFlags;
144 
145  return ((TargetFlags & R600_InstFlag::LDS_1A) |
146  (TargetFlags & R600_InstFlag::LDS_1A1D) |
147  (TargetFlags & R600_InstFlag::LDS_1A2D));
148 }
149 
150 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
151  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
152 }
153 
155  if (isALUInstr(MI.getOpcode()))
156  return true;
157  if (isVector(MI) || isCubeOp(MI.getOpcode()))
158  return true;
159  switch (MI.getOpcode()) {
160  case AMDGPU::PRED_X:
161  case AMDGPU::INTERP_PAIR_XY:
162  case AMDGPU::INTERP_PAIR_ZW:
163  case AMDGPU::INTERP_VEC_LOAD:
164  case AMDGPU::COPY:
165  case AMDGPU::DOT_4:
166  return true;
167  default:
168  return false;
169  }
170 }
171 
172 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
173  if (ST.hasCaymanISA())
174  return false;
175  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
176 }
177 
179  return isTransOnly(MI.getOpcode());
180 }
181 
182 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
183  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
184 }
185 
187  return isVectorOnly(MI.getOpcode());
188 }
189 
190 bool R600InstrInfo::isExport(unsigned Opcode) const {
191  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
192 }
193 
194 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
195  return ST.hasVertexCache() && IS_VTX(get(Opcode));
196 }
197 
199  const MachineFunction *MF = MI.getParent()->getParent();
200  return !AMDGPU::isCompute(MF->getFunction().getCallingConv()) &&
202 }
203 
204 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
205  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
206 }
207 
209  const MachineFunction *MF = MI.getParent()->getParent();
210  return (AMDGPU::isCompute(MF->getFunction().getCallingConv()) &&
211  usesVertexCache(MI.getOpcode())) ||
213 }
214 
215 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
216  switch (Opcode) {
217  case AMDGPU::KILLGT:
218  case AMDGPU::GROUP_BARRIER:
219  return true;
220  default:
221  return false;
222  }
223 }
224 
226  return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
227 }
228 
230  return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
231 }
232 
234  if (!isALUInstr(MI.getOpcode())) {
235  return false;
236  }
238  E = MI.operands_end();
239  I != E; ++I) {
240  if (!I->isReg() || !I->isUse() ||
242  continue;
243 
244  if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
245  return true;
246  }
247  return false;
248 }
249 
250 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
251  static const unsigned SrcSelTable[][2] = {
252  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
253  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
254  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
255  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
256  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
257  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
258  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
259  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
260  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
261  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
262  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
263  };
264 
265  for (const auto &Row : SrcSelTable) {
266  if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
267  return getOperandIdx(Opcode, Row[1]);
268  }
269  }
270  return -1;
271 }
272 
276 
277  if (MI.getOpcode() == AMDGPU::DOT_4) {
278  static const unsigned OpTable[8][2] = {
279  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
280  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
281  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
282  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
283  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
284  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
285  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
286  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
287  };
288 
289  for (unsigned j = 0; j < 8; j++) {
290  MachineOperand &MO =
291  MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
292  unsigned Reg = MO.getReg();
293  if (Reg == AMDGPU::ALU_CONST) {
294  MachineOperand &Sel =
295  MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
296  Result.push_back(std::make_pair(&MO, Sel.getImm()));
297  continue;
298  }
299 
300  }
301  return Result;
302  }
303 
304  static const unsigned OpTable[3][2] = {
305  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
306  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
307  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
308  };
309 
310  for (unsigned j = 0; j < 3; j++) {
311  int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]);
312  if (SrcIdx < 0)
313  break;
314  MachineOperand &MO = MI.getOperand(SrcIdx);
315  unsigned Reg = MO.getReg();
316  if (Reg == AMDGPU::ALU_CONST) {
317  MachineOperand &Sel =
318  MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
319  Result.push_back(std::make_pair(&MO, Sel.getImm()));
320  continue;
321  }
322  if (Reg == AMDGPU::ALU_LITERAL_X) {
323  MachineOperand &Operand =
324  MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
325  if (Operand.isImm()) {
326  Result.push_back(std::make_pair(&MO, Operand.getImm()));
327  continue;
328  }
329  assert(Operand.isGlobal());
330  }
331  Result.push_back(std::make_pair(&MO, 0));
332  }
333  return Result;
334 }
335 
336 std::vector<std::pair<int, unsigned>>
337 R600InstrInfo::ExtractSrcs(MachineInstr &MI,
339  unsigned &ConstCount) const {
340  ConstCount = 0;
341  const std::pair<int, unsigned> DummyPair(-1, 0);
342  std::vector<std::pair<int, unsigned>> Result;
343  unsigned i = 0;
344  for (const auto &Src : getSrcs(MI)) {
345  ++i;
346  unsigned Reg = Src.first->getReg();
347  int Index = RI.getEncodingValue(Reg) & 0xff;
348  if (Reg == AMDGPU::OQAP) {
349  Result.push_back(std::make_pair(Index, 0U));
350  }
351  if (PV.find(Reg) != PV.end()) {
352  // 255 is used to tells its a PS/PV reg
353  Result.push_back(std::make_pair(255, 0U));
354  continue;
355  }
356  if (Index > 127) {
357  ConstCount++;
358  Result.push_back(DummyPair);
359  continue;
360  }
361  unsigned Chan = RI.getHWRegChan(Reg);
362  Result.push_back(std::make_pair(Index, Chan));
363  }
364  for (; i < 3; ++i)
365  Result.push_back(DummyPair);
366  return Result;
367 }
368 
369 static std::vector<std::pair<int, unsigned>>
370 Swizzle(std::vector<std::pair<int, unsigned>> Src,
372  if (Src[0] == Src[1])
373  Src[1].first = -1;
374  switch (Swz) {
376  break;
378  std::swap(Src[1], Src[2]);
379  break;
381  std::swap(Src[0], Src[1]);
382  break;
384  std::swap(Src[0], Src[1]);
385  std::swap(Src[0], Src[2]);
386  break;
388  std::swap(Src[0], Src[2]);
389  std::swap(Src[0], Src[1]);
390  break;
392  std::swap(Src[0], Src[2]);
393  break;
394  }
395  return Src;
396 }
397 
398 static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
399  switch (Swz) {
401  unsigned Cycles[3] = { 2, 1, 0};
402  return Cycles[Op];
403  }
405  unsigned Cycles[3] = { 1, 2, 2};
406  return Cycles[Op];
407  }
409  unsigned Cycles[3] = { 2, 1, 2};
410  return Cycles[Op];
411  }
413  unsigned Cycles[3] = { 2, 2, 1};
414  return Cycles[Op];
415  }
416  default:
417  llvm_unreachable("Wrong Swizzle for Trans Slot");
418  }
419 }
420 
421 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
422 /// in the same Instruction Group while meeting read port limitations given a
423 /// Swz swizzle sequence.
425  const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
426  const std::vector<R600InstrInfo::BankSwizzle> &Swz,
427  const std::vector<std::pair<int, unsigned>> &TransSrcs,
428  R600InstrInfo::BankSwizzle TransSwz) const {
429  int Vector[4][3];
430  memset(Vector, -1, sizeof(Vector));
431  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
432  const std::vector<std::pair<int, unsigned>> &Srcs =
433  Swizzle(IGSrcs[i], Swz[i]);
434  for (unsigned j = 0; j < 3; j++) {
435  const std::pair<int, unsigned> &Src = Srcs[j];
436  if (Src.first < 0 || Src.first == 255)
437  continue;
438  if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
439  if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
441  // The value from output queue A (denoted by register OQAP) can
442  // only be fetched during the first cycle.
443  return false;
444  }
445  // OQAP does not count towards the normal read port restrictions
446  continue;
447  }
448  if (Vector[Src.second][j] < 0)
449  Vector[Src.second][j] = Src.first;
450  if (Vector[Src.second][j] != Src.first)
451  return i;
452  }
453  }
454  // Now check Trans Alu
455  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
456  const std::pair<int, unsigned> &Src = TransSrcs[i];
457  unsigned Cycle = getTransSwizzle(TransSwz, i);
458  if (Src.first < 0)
459  continue;
460  if (Src.first == 255)
461  continue;
462  if (Vector[Src.second][Cycle] < 0)
463  Vector[Src.second][Cycle] = Src.first;
464  if (Vector[Src.second][Cycle] != Src.first)
465  return IGSrcs.size() - 1;
466  }
467  return IGSrcs.size();
468 }
469 
470 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
471 /// (in lexicographic term) swizzle sequence assuming that all swizzles after
472 /// Idx can be skipped
473 static bool
475  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
476  unsigned Idx) {
477  assert(Idx < SwzCandidate.size());
478  int ResetIdx = Idx;
479  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
480  ResetIdx --;
481  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
482  SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
483  }
484  if (ResetIdx == -1)
485  return false;
486  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
487  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
488  return true;
489 }
490 
491 /// Enumerate all possible Swizzle sequence to find one that can meet all
492 /// read port requirements.
494  const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
495  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
496  const std::vector<std::pair<int, unsigned>> &TransSrcs,
497  R600InstrInfo::BankSwizzle TransSwz) const {
498  unsigned ValidUpTo = 0;
499  do {
500  ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
501  if (ValidUpTo == IGSrcs.size())
502  return true;
503  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
504  return false;
505 }
506 
507 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read
508 /// a const, and can't read a gpr at cycle 1 if they read 2 const.
509 static bool
511  const std::vector<std::pair<int, unsigned>> &TransOps,
512  unsigned ConstCount) {
513  // TransALU can't read 3 constants
514  if (ConstCount > 2)
515  return false;
516  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
517  const std::pair<int, unsigned> &Src = TransOps[i];
518  unsigned Cycle = getTransSwizzle(TransSwz, i);
519  if (Src.first < 0)
520  continue;
521  if (ConstCount > 0 && Cycle == 0)
522  return false;
523  if (ConstCount > 1 && Cycle == 1)
524  return false;
525  }
526  return true;
527 }
528 
529 bool
530 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
532  std::vector<BankSwizzle> &ValidSwizzle,
533  bool isLastAluTrans)
534  const {
535  //Todo : support shared src0 - src1 operand
536 
537  std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
538  ValidSwizzle.clear();
539  unsigned ConstCount;
541  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
542  IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
543  unsigned Op = getOperandIdx(IG[i]->getOpcode(),
544  AMDGPU::OpName::bank_swizzle);
545  ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
546  IG[i]->getOperand(Op).getImm());
547  }
548  std::vector<std::pair<int, unsigned>> TransOps;
549  if (!isLastAluTrans)
550  return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
551 
552  TransOps = std::move(IGSrcs.back());
553  IGSrcs.pop_back();
554  ValidSwizzle.pop_back();
555 
556  static const R600InstrInfo::BankSwizzle TransSwz[] = {
561  };
562  for (unsigned i = 0; i < 4; i++) {
563  TransBS = TransSwz[i];
564  if (!isConstCompatible(TransBS, TransOps, ConstCount))
565  continue;
566  bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
567  TransBS);
568  if (Result) {
569  ValidSwizzle.push_back(TransBS);
570  return true;
571  }
572  }
573 
574  return false;
575 }
576 
577 bool
578 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
579  const {
580  assert (Consts.size() <= 12 && "Too many operands in instructions group");
581  unsigned Pair1 = 0, Pair2 = 0;
582  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
583  unsigned ReadConstHalf = Consts[i] & 2;
584  unsigned ReadConstIndex = Consts[i] & (~3);
585  unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
586  if (!Pair1) {
587  Pair1 = ReadHalfConst;
588  continue;
589  }
590  if (Pair1 == ReadHalfConst)
591  continue;
592  if (!Pair2) {
593  Pair2 = ReadHalfConst;
594  continue;
595  }
596  if (Pair2 != ReadHalfConst)
597  return false;
598  }
599  return true;
600 }
601 
602 bool
603 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
604  const {
605  std::vector<unsigned> Consts;
606  SmallSet<int64_t, 4> Literals;
607  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
608  MachineInstr &MI = *MIs[i];
609  if (!isALUInstr(MI.getOpcode()))
610  continue;
611 
612  for (const auto &Src : getSrcs(MI)) {
613  if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
614  Literals.insert(Src.second);
615  if (Literals.size() > 4)
616  return false;
617  if (Src.first->getReg() == AMDGPU::ALU_CONST)
618  Consts.push_back(Src.second);
619  if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
620  AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
621  unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
622  unsigned Chan = RI.getHWRegChan(Src.first->getReg());
623  Consts.push_back((Index << 2) | Chan);
624  }
625  }
626  }
627  return fitsConstReadLimitations(Consts);
628 }
629 
632  const InstrItineraryData *II = STI.getInstrItineraryData();
633  return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II);
634 }
635 
636 static bool
637 isPredicateSetter(unsigned Opcode) {
638  switch (Opcode) {
639  case AMDGPU::PRED_X:
640  return true;
641  default:
642  return false;
643  }
644 }
645 
646 static MachineInstr *
649  while (I != MBB.begin()) {
650  --I;
651  MachineInstr &MI = *I;
652  if (isPredicateSetter(MI.getOpcode()))
653  return &MI;
654  }
655 
656  return nullptr;
657 }
658 
659 static
660 bool isJump(unsigned Opcode) {
661  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
662 }
663 
664 static bool isBranch(unsigned Opcode) {
665  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
666  Opcode == AMDGPU::BRANCH_COND_f32;
667 }
668 
670  MachineBasicBlock *&TBB,
671  MachineBasicBlock *&FBB,
673  bool AllowModify) const {
674  // Most of the following comes from the ARM implementation of AnalyzeBranch
675 
676  // If the block has no terminators, it just falls into the block after it.
678  if (I == MBB.end())
679  return false;
680 
681  // AMDGPU::BRANCH* instructions are only available after isel and are not
682  // handled
683  if (isBranch(I->getOpcode()))
684  return true;
685  if (!isJump(I->getOpcode())) {
686  return false;
687  }
688 
689  // Remove successive JUMP
690  while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
691  MachineBasicBlock::iterator PriorI = std::prev(I);
692  if (AllowModify)
693  I->removeFromParent();
694  I = PriorI;
695  }
696  MachineInstr &LastInst = *I;
697 
698  // If there is only one terminator instruction, process it.
699  unsigned LastOpc = LastInst.getOpcode();
700  if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
701  if (LastOpc == AMDGPU::JUMP) {
702  TBB = LastInst.getOperand(0).getMBB();
703  return false;
704  } else if (LastOpc == AMDGPU::JUMP_COND) {
705  auto predSet = I;
706  while (!isPredicateSetter(predSet->getOpcode())) {
707  predSet = --I;
708  }
709  TBB = LastInst.getOperand(0).getMBB();
710  Cond.push_back(predSet->getOperand(1));
711  Cond.push_back(predSet->getOperand(2));
712  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
713  return false;
714  }
715  return true; // Can't handle indirect branch.
716  }
717 
718  // Get the instruction before it if it is a terminator.
719  MachineInstr &SecondLastInst = *I;
720  unsigned SecondLastOpc = SecondLastInst.getOpcode();
721 
722  // If the block ends with a B and a Bcc, handle it.
723  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
724  auto predSet = --I;
725  while (!isPredicateSetter(predSet->getOpcode())) {
726  predSet = --I;
727  }
728  TBB = SecondLastInst.getOperand(0).getMBB();
729  FBB = LastInst.getOperand(0).getMBB();
730  Cond.push_back(predSet->getOperand(1));
731  Cond.push_back(predSet->getOperand(2));
732  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
733  return false;
734  }
735 
736  // Otherwise, can't handle this.
737  return true;
738 }
739 
740 static
742  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
743  It != E; ++It) {
744  if (It->getOpcode() == AMDGPU::CF_ALU ||
745  It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
746  return It.getReverse();
747  }
748  return MBB.end();
749 }
750 
752  MachineBasicBlock *TBB,
753  MachineBasicBlock *FBB,
755  const DebugLoc &DL,
756  int *BytesAdded) const {
757  assert(TBB && "insertBranch must not be told to insert a fallthrough");
758  assert(!BytesAdded && "code size not handled");
759 
760  if (!FBB) {
761  if (Cond.empty()) {
762  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
763  return 1;
764  } else {
765  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
766  assert(PredSet && "No previous predicate !");
767  addFlag(*PredSet, 0, MO_FLAG_PUSH);
768  PredSet->getOperand(2).setImm(Cond[1].getImm());
769 
770  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
771  .addMBB(TBB)
772  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
774  if (CfAlu == MBB.end())
775  return 1;
776  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
777  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
778  return 1;
779  }
780  } else {
781  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
782  assert(PredSet && "No previous predicate !");
783  addFlag(*PredSet, 0, MO_FLAG_PUSH);
784  PredSet->getOperand(2).setImm(Cond[1].getImm());
785  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
786  .addMBB(TBB)
787  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
788  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
790  if (CfAlu == MBB.end())
791  return 2;
792  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
793  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
794  return 2;
795  }
796 }
797 
799  int *BytesRemoved) const {
800  assert(!BytesRemoved && "code size not handled");
801 
802  // Note : we leave PRED* instructions there.
803  // They may be needed when predicating instructions.
804 
806 
807  if (I == MBB.begin()) {
808  return 0;
809  }
810  --I;
811  switch (I->getOpcode()) {
812  default:
813  return 0;
814  case AMDGPU::JUMP_COND: {
815  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
816  clearFlag(*predSet, 0, MO_FLAG_PUSH);
817  I->eraseFromParent();
819  if (CfAlu == MBB.end())
820  break;
821  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
822  CfAlu->setDesc(get(AMDGPU::CF_ALU));
823  break;
824  }
825  case AMDGPU::JUMP:
826  I->eraseFromParent();
827  break;
828  }
829  I = MBB.end();
830 
831  if (I == MBB.begin()) {
832  return 1;
833  }
834  --I;
835  switch (I->getOpcode()) {
836  // FIXME: only one case??
837  default:
838  return 1;
839  case AMDGPU::JUMP_COND: {
840  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
841  clearFlag(*predSet, 0, MO_FLAG_PUSH);
842  I->eraseFromParent();
844  if (CfAlu == MBB.end())
845  break;
846  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
847  CfAlu->setDesc(get(AMDGPU::CF_ALU));
848  break;
849  }
850  case AMDGPU::JUMP:
851  I->eraseFromParent();
852  break;
853  }
854  return 2;
855 }
856 
858  int idx = MI.findFirstPredOperandIdx();
859  if (idx < 0)
860  return false;
861 
862  unsigned Reg = MI.getOperand(idx).getReg();
863  switch (Reg) {
864  default: return false;
865  case AMDGPU::PRED_SEL_ONE:
866  case AMDGPU::PRED_SEL_ZERO:
867  case AMDGPU::PREDICATE_BIT:
868  return true;
869  }
870 }
871 
873  // XXX: KILL* instructions can be predicated, but they must be the last
874  // instruction in a clause, so this means any instructions after them cannot
875  // be predicated. Until we have proper support for instruction clauses in the
876  // backend, we will mark KILL* instructions as unpredicable.
877 
878  if (MI.getOpcode() == AMDGPU::KILLGT) {
879  return false;
880  } else if (MI.getOpcode() == AMDGPU::CF_ALU) {
881  // If the clause start in the middle of MBB then the MBB has more
882  // than a single clause, unable to predicate several clauses.
884  return false;
885  // TODO: We don't support KC merging atm
886  return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0;
887  } else if (isVector(MI)) {
888  return false;
889  } else {
890  return AMDGPUInstrInfo::isPredicable(MI);
891  }
892 }
893 
894 bool
896  unsigned NumCycles,
897  unsigned ExtraPredCycles,
898  BranchProbability Probability) const{
899  return true;
900 }
901 
902 bool
904  unsigned NumTCycles,
905  unsigned ExtraTCycles,
906  MachineBasicBlock &FMBB,
907  unsigned NumFCycles,
908  unsigned ExtraFCycles,
909  BranchProbability Probability) const {
910  return true;
911 }
912 
913 bool
915  unsigned NumCycles,
916  BranchProbability Probability)
917  const {
918  return true;
919 }
920 
921 bool
923  MachineBasicBlock &FMBB) const {
924  return false;
925 }
926 
927 bool
929  MachineOperand &MO = Cond[1];
930  switch (MO.getImm()) {
931  case AMDGPU::PRED_SETE_INT:
932  MO.setImm(AMDGPU::PRED_SETNE_INT);
933  break;
934  case AMDGPU::PRED_SETNE_INT:
935  MO.setImm(AMDGPU::PRED_SETE_INT);
936  break;
937  case AMDGPU::PRED_SETE:
938  MO.setImm(AMDGPU::PRED_SETNE);
939  break;
940  case AMDGPU::PRED_SETNE:
941  MO.setImm(AMDGPU::PRED_SETE);
942  break;
943  default:
944  return true;
945  }
946 
947  MachineOperand &MO2 = Cond[2];
948  switch (MO2.getReg()) {
949  case AMDGPU::PRED_SEL_ZERO:
950  MO2.setReg(AMDGPU::PRED_SEL_ONE);
951  break;
952  case AMDGPU::PRED_SEL_ONE:
953  MO2.setReg(AMDGPU::PRED_SEL_ZERO);
954  break;
955  default:
956  return true;
957  }
958  return false;
959 }
960 
962  std::vector<MachineOperand> &Pred) const {
963  return isPredicateSetter(MI.getOpcode());
964 }
965 
967  ArrayRef<MachineOperand> Pred) const {
968  int PIdx = MI.findFirstPredOperandIdx();
969 
970  if (MI.getOpcode() == AMDGPU::CF_ALU) {
971  MI.getOperand(8).setImm(0);
972  return true;
973  }
974 
975  if (MI.getOpcode() == AMDGPU::DOT_4) {
976  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
977  .setReg(Pred[2].getReg());
978  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
979  .setReg(Pred[2].getReg());
980  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
981  .setReg(Pred[2].getReg());
982  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
983  .setReg(Pred[2].getReg());
984  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
985  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
986  return true;
987  }
988 
989  if (PIdx != -1) {
990  MachineOperand &PMO = MI.getOperand(PIdx);
991  PMO.setReg(Pred[2].getReg());
992  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
993  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
994  return true;
995  }
996 
997  return false;
998 }
999 
1001  return 2;
1002 }
1003 
1005  const MachineInstr &,
1006  unsigned *PredCost) const {
1007  if (PredCost)
1008  *PredCost = 2;
1009  return 2;
1010 }
1011 
1012 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1013  unsigned Channel) const {
1014  assert(Channel == 0);
1015  return RegIndex;
1016 }
1017 
1019  switch (MI.getOpcode()) {
1020  default: {
1021  MachineBasicBlock *MBB = MI.getParent();
1022  int OffsetOpIdx =
1023  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
1024  // addr is a custom operand with multiple MI operands, and only the
1025  // first MI operand is given a name.
1026  int RegOpIdx = OffsetOpIdx + 1;
1027  int ChanOpIdx =
1028  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
1029  if (isRegisterLoad(MI)) {
1030  int DstOpIdx =
1031  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
1032  unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1033  unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1034  unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1035  unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1036  if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
1037  buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
1038  getIndirectAddrRegClass()->getRegister(Address));
1039  } else {
1040  buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address,
1041  OffsetReg);
1042  }
1043  } else if (isRegisterStore(MI)) {
1044  int ValOpIdx =
1045  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
1046  unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1047  unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1048  unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1049  unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1050  if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
1051  buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
1052  MI.getOperand(ValOpIdx).getReg());
1053  } else {
1054  buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(),
1055  calculateIndirectAddress(RegIndex, Channel),
1056  OffsetReg);
1057  }
1058  } else {
1059  return false;
1060  }
1061 
1062  MBB->erase(MI);
1063  return true;
1064  }
1065  case AMDGPU::R600_EXTRACT_ELT_V2:
1066  case AMDGPU::R600_EXTRACT_ELT_V4:
1067  buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
1068  RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
1069  MI.getOperand(2).getReg(),
1070  RI.getHWRegChan(MI.getOperand(1).getReg()));
1071  break;
1072  case AMDGPU::R600_INSERT_ELT_V2:
1073  case AMDGPU::R600_INSERT_ELT_V4:
1074  buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
1075  RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
1076  MI.getOperand(3).getReg(), // Offset
1077  RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel
1078  break;
1079  }
1080  MI.eraseFromParent();
1081  return true;
1082 }
1083 
1085  const MachineFunction &MF,
1086  const R600RegisterInfo &TRI) const {
1087  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1088  const R600FrameLowering *TFL = ST.getFrameLowering();
1089 
1090  unsigned StackWidth = TFL->getStackWidth(MF);
1091  int End = getIndirectIndexEnd(MF);
1092 
1093  if (End == -1)
1094  return;
1095 
1096  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1097  for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1098  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1099  TRI.reserveRegisterTuples(Reserved, Reg);
1100  }
1101  }
1102 }
1103 
1105  return &AMDGPU::R600_TReg32_XRegClass;
1106 }
1107 
1108 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1110  unsigned ValueReg, unsigned Address,
1111  unsigned OffsetReg) const {
1112  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
1113 }
1114 
1115 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1117  unsigned ValueReg, unsigned Address,
1118  unsigned OffsetReg,
1119  unsigned AddrChan) const {
1120  unsigned AddrReg;
1121  switch (AddrChan) {
1122  default: llvm_unreachable("Invalid Channel");
1123  case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1124  case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1125  case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1126  case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1127  }
1128  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1129  AMDGPU::AR_X, OffsetReg);
1131 
1132  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1133  AddrReg, ValueReg)
1134  .addReg(AMDGPU::AR_X,
1136  setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
1137  return Mov;
1138 }
1139 
1140 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1142  unsigned ValueReg, unsigned Address,
1143  unsigned OffsetReg) const {
1144  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
1145 }
1146 
1147 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1149  unsigned ValueReg, unsigned Address,
1150  unsigned OffsetReg,
1151  unsigned AddrChan) const {
1152  unsigned AddrReg;
1153  switch (AddrChan) {
1154  default: llvm_unreachable("Invalid Channel");
1155  case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1156  case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1157  case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1158  case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1159  }
1160  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1161  AMDGPU::AR_X,
1162  OffsetReg);
1164  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1165  ValueReg,
1166  AddrReg)
1167  .addReg(AMDGPU::AR_X,
1169  setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
1170 
1171  return Mov;
1172 }
1173 
1175  const MachineRegisterInfo &MRI = MF.getRegInfo();
1176  const MachineFrameInfo &MFI = MF.getFrameInfo();
1177  int Offset = -1;
1178 
1179  if (MFI.getNumObjects() == 0) {
1180  return -1;
1181  }
1182 
1183  if (MRI.livein_empty()) {
1184  return 0;
1185  }
1186 
1187  const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
1188  for (std::pair<unsigned, unsigned> LI : MRI.liveins()) {
1189  unsigned Reg = LI.first;
1191  !IndirectRC->contains(Reg))
1192  continue;
1193 
1194  unsigned RegIndex;
1195  unsigned RegEnd;
1196  for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
1197  ++RegIndex) {
1198  if (IndirectRC->getRegister(RegIndex) == Reg)
1199  break;
1200  }
1201  Offset = std::max(Offset, (int)RegIndex);
1202  }
1203 
1204  return Offset + 1;
1205 }
1206 
1208  int Offset = 0;
1209  const MachineFrameInfo &MFI = MF.getFrameInfo();
1210 
1211  // Variable sized objects are not supported
1212  if (MFI.hasVarSizedObjects()) {
1213  return -1;
1214  }
1215 
1216  if (MFI.getNumObjects() == 0) {
1217  return -1;
1218  }
1219 
1220  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1221  const R600FrameLowering *TFL = ST.getFrameLowering();
1222 
1223  unsigned IgnoredFrameReg;
1224  Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
1225 
1226  return getIndirectIndexBegin(MF) + Offset;
1227 }
1228 
1230  return 115;
1231 }
1232 
1235  unsigned Opcode,
1236  unsigned DstReg,
1237  unsigned Src0Reg,
1238  unsigned Src1Reg) const {
1239  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1240  DstReg); // $dst
1241 
1242  if (Src1Reg) {
1243  MIB.addImm(0) // $update_exec_mask
1244  .addImm(0); // $update_predicate
1245  }
1246  MIB.addImm(1) // $write
1247  .addImm(0) // $omod
1248  .addImm(0) // $dst_rel
1249  .addImm(0) // $dst_clamp
1250  .addReg(Src0Reg) // $src0
1251  .addImm(0) // $src0_neg
1252  .addImm(0) // $src0_rel
1253  .addImm(0) // $src0_abs
1254  .addImm(-1); // $src0_sel
1255 
1256  if (Src1Reg) {
1257  MIB.addReg(Src1Reg) // $src1
1258  .addImm(0) // $src1_neg
1259  .addImm(0) // $src1_rel
1260  .addImm(0) // $src1_abs
1261  .addImm(-1); // $src1_sel
1262  }
1263 
1264  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1265  //scheduling to the backend, we can change the default to 0.
1266  MIB.addImm(1) // $last
1267  .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1268  .addImm(0) // $literal
1269  .addImm(0); // $bank_swizzle
1270 
1271  return MIB;
1272 }
1273 
1274 #define OPERAND_CASE(Label) \
1275  case Label: { \
1276  static const unsigned Ops[] = \
1277  { \
1278  Label##_X, \
1279  Label##_Y, \
1280  Label##_Z, \
1281  Label##_W \
1282  }; \
1283  return Ops[Slot]; \
1284  }
1285 
1286 static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
1287  switch (Op) {
1288  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1289  OPERAND_CASE(AMDGPU::OpName::update_pred)
1291  OPERAND_CASE(AMDGPU::OpName::omod)
1292  OPERAND_CASE(AMDGPU::OpName::dst_rel)
1293  OPERAND_CASE(AMDGPU::OpName::clamp)
1294  OPERAND_CASE(AMDGPU::OpName::src0)
1295  OPERAND_CASE(AMDGPU::OpName::src0_neg)
1296  OPERAND_CASE(AMDGPU::OpName::src0_rel)
1297  OPERAND_CASE(AMDGPU::OpName::src0_abs)
1298  OPERAND_CASE(AMDGPU::OpName::src0_sel)
1299  OPERAND_CASE(AMDGPU::OpName::src1)
1300  OPERAND_CASE(AMDGPU::OpName::src1_neg)
1301  OPERAND_CASE(AMDGPU::OpName::src1_rel)
1302  OPERAND_CASE(AMDGPU::OpName::src1_abs)
1303  OPERAND_CASE(AMDGPU::OpName::src1_sel)
1304  OPERAND_CASE(AMDGPU::OpName::pred_sel)
1305  default:
1306  llvm_unreachable("Wrong Operand");
1307  }
1308 }
1309 
1310 #undef OPERAND_CASE
1311 
1313  MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1314  const {
1315  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1316  unsigned Opcode;
1317  if (ST.getGeneration() <= R600Subtarget::R700)
1318  Opcode = AMDGPU::DOT4_r600;
1319  else
1320  Opcode = AMDGPU::DOT4_eg;
1322  MachineOperand &Src0 = MI->getOperand(
1323  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1324  MachineOperand &Src1 = MI->getOperand(
1325  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1327  MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1328  static const unsigned Operands[14] = {
1329  AMDGPU::OpName::update_exec_mask,
1330  AMDGPU::OpName::update_pred,
1332  AMDGPU::OpName::omod,
1333  AMDGPU::OpName::dst_rel,
1334  AMDGPU::OpName::clamp,
1335  AMDGPU::OpName::src0_neg,
1336  AMDGPU::OpName::src0_rel,
1337  AMDGPU::OpName::src0_abs,
1338  AMDGPU::OpName::src0_sel,
1339  AMDGPU::OpName::src1_neg,
1340  AMDGPU::OpName::src1_rel,
1341  AMDGPU::OpName::src1_abs,
1342  AMDGPU::OpName::src1_sel,
1343  };
1344 
1346  getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1347  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1348  .setReg(MO.getReg());
1349 
1350  for (unsigned i = 0; i < 14; i++) {
1351  MachineOperand &MO = MI->getOperand(
1352  getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1353  assert (MO.isImm());
1354  setImmOperand(*MIB, Operands[i], MO.getImm());
1355  }
1356  MIB->getOperand(20).setImm(0);
1357  return MIB;
1358 }
1359 
1362  unsigned DstReg,
1363  uint64_t Imm) const {
1364  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1365  AMDGPU::ALU_LITERAL_X);
1366  setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
1367  return MovImm;
1368 }
1369 
1372  unsigned DstReg, unsigned SrcReg) const {
1373  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1374 }
1375 
1376 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1377  return getOperandIdx(MI.getOpcode(), Op);
1378 }
1379 
1380 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1381  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1382 }
1383 
1385  int64_t Imm) const {
1386  int Idx = getOperandIdx(MI, Op);
1387  assert(Idx != -1 && "Operand not supported for this instruction.");
1388  assert(MI.getOperand(Idx).isImm());
1389  MI.getOperand(Idx).setImm(Imm);
1390 }
1391 
1392 //===----------------------------------------------------------------------===//
1393 // Instruction flag getters/setters
1394 //===----------------------------------------------------------------------===//
1395 
1397  unsigned Flag) const {
1398  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1399  int FlagIndex = 0;
1400  if (Flag != 0) {
1401  // If we pass something other than the default value of Flag to this
1402  // function, it means we are want to set a flag on an instruction
1403  // that uses native encoding.
1404  assert(HAS_NATIVE_OPERANDS(TargetFlags));
1405  bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1406  switch (Flag) {
1407  case MO_FLAG_CLAMP:
1408  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
1409  break;
1410  case MO_FLAG_MASK:
1411  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
1412  break;
1413  case MO_FLAG_NOT_LAST:
1414  case MO_FLAG_LAST:
1415  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
1416  break;
1417  case MO_FLAG_NEG:
1418  switch (SrcIdx) {
1419  case 0:
1420  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
1421  break;
1422  case 1:
1423  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
1424  break;
1425  case 2:
1426  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
1427  break;
1428  }
1429  break;
1430 
1431  case MO_FLAG_ABS:
1432  assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1433  "instructions.");
1434  (void)IsOP3;
1435  switch (SrcIdx) {
1436  case 0:
1437  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
1438  break;
1439  case 1:
1440  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
1441  break;
1442  }
1443  break;
1444 
1445  default:
1446  FlagIndex = -1;
1447  break;
1448  }
1449  assert(FlagIndex != -1 && "Flag not supported for this instruction");
1450  } else {
1451  FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1452  assert(FlagIndex != 0 &&
1453  "Instruction flags not supported for this instruction");
1454  }
1455 
1456  MachineOperand &FlagOp = MI.getOperand(FlagIndex);
1457  assert(FlagOp.isImm());
1458  return FlagOp;
1459 }
1460 
1461 void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand,
1462  unsigned Flag) const {
1463  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1464  if (Flag == 0) {
1465  return;
1466  }
1467  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1468  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1469  if (Flag == MO_FLAG_NOT_LAST) {
1470  clearFlag(MI, Operand, MO_FLAG_LAST);
1471  } else if (Flag == MO_FLAG_MASK) {
1472  clearFlag(MI, Operand, Flag);
1473  } else {
1474  FlagOp.setImm(1);
1475  }
1476  } else {
1477  MachineOperand &FlagOp = getFlagOp(MI, Operand);
1478  FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1479  }
1480 }
1481 
1482 void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
1483  unsigned Flag) const {
1484  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1485  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1486  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1487  FlagOp.setImm(0);
1488  } else {
1489  MachineOperand &FlagOp = getFlagOp(MI);
1490  unsigned InstFlags = FlagOp.getImm();
1491  InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1492  FlagOp.setImm(InstFlags);
1493  }
1494 }
1495 
1498  switch (Kind) {
1501  return AMDGPUASI.PRIVATE_ADDRESS;
1508  return AMDGPUASI.CONSTANT_ADDRESS;
1509  }
1510  llvm_unreachable("Invalid pseudo source kind");
1511  return AMDGPUASI.PRIVATE_ADDRESS;
1512 }
bool isMov(unsigned Opcode) const
bool usesAddressRegister(MachineInstr &MI) const
mop_iterator operands_end()
Definition: MachineInstr.h:348
static unsigned getSubRegFromChannel(unsigned Channel)
Generation getGeneration() const
unsigned int getPredicationCost(const MachineInstr &) const override
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
AMDGPU specific subclass of TargetSubtarget.
MachineBasicBlock * getMBB() const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getNumRegs() const
Return the number of registers in this class.
unsigned getNumObjects() const
Return the number of objects.
Interface definition for R600InstrInfo.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, BranchProbability Probability) const override
#define MO_FLAG_LAST
Definition: R600Defines.h:23
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
#define NUM_MO_FLAGS
Definition: R600Defines.h:24
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
unsigned Reg
unsigned isLegalUpTo(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, const std::vector< R600InstrInfo::BankSwizzle > &Swz, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
returns how many MIs (whose inputs are represented by IGSrcs) can be packed in the same Instruction G...
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned >> Src, R600InstrInfo::BankSwizzle Swz)
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool isPredicated(const MachineInstr &MI) const override
static unsigned getSlotedOps(unsigned Op, unsigned Slot)
void clearFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Clear the specified flag on the instruction.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
MachineInstr * buildSlotOfVectorInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) const
MachineInstrBundleIterator< const MachineInstr > const_iterator
Interface definition for R600RegisterInfo.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
return AArch64::GPR64RegClass contains(Reg)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
unsigned getAddressSpaceForPseudoSourceKind(PseudoSourceValue::PSVKind Kind) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
unsigned int getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
static MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB)
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op)
#define MO_FLAG_ABS
Definition: R600Defines.h:19
R600InstrInfo(const R600Subtarget &)
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
MachineOperand & getFlagOp(MachineInstr &MI, unsigned SrcIdx=0, unsigned Flag=0) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
bool isRegisterLoad(const MachineInstr &MI) const
#define HAS_NATIVE_OPERANDS(Flags)
Definition: R600Defines.h:53
static bool isJump(unsigned Opcode)
bool hasInstrModifiers(unsigned Opcode) const
unsigned getMaxAlusPerClause() const
bool isRegisterStore(const MachineInstr &MI) const
bool usesVertexCache(unsigned Opcode) const
#define MO_FLAG_NEG
Definition: R600Defines.h:18
bool mustBeLastInClause(unsigned Opcode) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Itinerary data supplied by a subtarget to be used by a target.
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
reverse_iterator rend()
const TargetRegisterClass * getIndirectAddrRegClass() const
reverse_iterator rbegin()
bool isPredicable(const MachineInstr &MI) const override
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
bool isExport(unsigned Opcode) const
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
unsigned getHWRegIndex(unsigned Reg) const
const R600FrameLowering * getFrameLowering() const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemvoed=nullptr) const override
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isCompute(CallingConv::ID cc)
unsigned const MachineRegisterInfo * MRI
bool FindSwizzleForVectorSlot(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
Enumerate all possible Swizzle sequence to find one that can meet all read port requirements.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
size_type size() const
Definition: SmallSet.h:60
bool isLDSInstr(unsigned Opcode) const
static const unsigned End
void setImm(int64_t immVal)
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
bool isVectorOnly(unsigned Opcode) const
static MachineInstr * findFirstPredicateSetterFrom(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
static void write(bool isBE, void *P, T V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
bool hasVertexCache() const
void setIsKill(bool Val=true)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:225
bool isTransOnly(unsigned Opcode) const
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
static bool NextPossibleSolution(std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, unsigned Idx)
Given a swizzle sequence SwzCandidate and an index Idx, returns the next (in lexicographic term) swiz...
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
bool isPhysRegLiveAcrossClauses(unsigned Reg) const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const
Calculate the "Indirect Address" for the given RegIndex and Channel.
#define OPERAND_CASE(Label)
int64_t getImm() const
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register&#39;s channel.
#define IS_TEX(desc)
Definition: R600Defines.h:63
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
bool isALUInstr(unsigned Opcode) const
static bool isPredicateSetter(unsigned Opcode)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
ArrayRef< std::pair< unsigned, unsigned > > liveins() const
bool readsLDSSrcReg(const MachineInstr &MI) const
bool isLDSRetInstr(unsigned Opcode) const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:156
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
static bool isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, const std::vector< std::pair< int, unsigned >> &TransOps, unsigned ConstCount)
Instructions in Trans slot can&#39;t read gpr at cycle 0 if they also read a const, and can&#39;t read a gpr ...
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
#define MO_FLAG_NOT_LAST
Definition: R600Defines.h:22
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool isReductionOp(unsigned opcode) const
#define GET_FLAG_OPERAND_IDX(Flags)
Helper for getting the operand index for the instruction flags operand.
Definition: R600Defines.h:28
bool fitsReadPortLimitations(const std::vector< MachineInstr *> &MIs, const DenseMap< unsigned, unsigned > &PV, std::vector< BankSwizzle > &BS, bool isLastAluTrans) const
Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 returns true and the first ...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end()
Definition: DenseMap.h:79
int getIndirectIndexBegin(const MachineFunction &MF) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
SmallVector< std::pair< MachineOperand *, int64_t >, 3 > getSrcs(MachineInstr &MI) const
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
int getIndirectIndexEnd(const MachineFunction &MF) const
const unsigned Kind
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const override
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool canBeConsideredALU(const MachineInstr &MI) const
static bool isBranch(unsigned Opcode)
mop_iterator operands_begin()
Definition: MachineInstr.h:347
#define GET_REG_INDEX(reg)
Definition: R600Defines.h:60
#define IS_VTX(desc)
Definition: R600Defines.h:62
IRTranslator LLVM IR MI
#define MO_FLAG_MASK
Definition: R600Defines.h:20
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
MachineInstr * buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
bool usesTextureCache(unsigned Opcode) const
void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF, const R600RegisterInfo &TRI) const
Reserve the registers that may be accesed using indirect addressing.
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:216
bool definesAddressRegister(MachineInstr &MI) const
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
bool isCubeOp(unsigned opcode) const
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
unsigned getStackWidth(const MachineFunction &MF) const