LLVM  4.0.0
R600InstrInfo.cpp
Go to the documentation of this file.
1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief R600 Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600InstrInfo.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "R600Defines.h"
21 #include "R600RegisterInfo.h"
25 
26 using namespace llvm;
27 
28 #define GET_INSTRINFO_CTOR_DTOR
29 #include "AMDGPUGenDFAPacketizer.inc"
30 
32  : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
33 
35  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
36 }
37 
40  const DebugLoc &DL, unsigned DestReg,
41  unsigned SrcReg, bool KillSrc) const {
42  unsigned VectorComponents = 0;
43  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
44  AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
45  (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
46  AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
47  VectorComponents = 4;
48  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
49  AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
50  (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
51  AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
52  VectorComponents = 2;
53  }
54 
55  if (VectorComponents > 0) {
56  for (unsigned I = 0; I < VectorComponents; I++) {
57  unsigned SubRegIndex = RI.getSubRegFromChannel(I);
58  buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
59  RI.getSubReg(DestReg, SubRegIndex),
60  RI.getSubReg(SrcReg, SubRegIndex))
61  .addReg(DestReg,
63  }
64  } else {
65  MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
66  DestReg, SrcReg);
67  NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
68  .setIsKill(KillSrc);
69  }
70 }
71 
72 /// \returns true if \p MBBI can be moved into a new basic.
74  MachineBasicBlock::iterator MBBI) const {
75  for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
76  E = MBBI->operands_end(); I != E; ++I) {
77  if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
78  I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
79  return false;
80  }
81  return true;
82 }
83 
84 bool R600InstrInfo::isMov(unsigned Opcode) const {
85  switch(Opcode) {
86  default:
87  return false;
88  case AMDGPU::MOV:
89  case AMDGPU::MOV_IMM_F32:
90  case AMDGPU::MOV_IMM_I32:
91  return true;
92  }
93 }
94 
95 bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
96  return false;
97 }
98 
99 bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
100  switch(Opcode) {
101  default: return false;
102  case AMDGPU::CUBE_r600_pseudo:
103  case AMDGPU::CUBE_r600_real:
104  case AMDGPU::CUBE_eg_pseudo:
105  case AMDGPU::CUBE_eg_real:
106  return true;
107  }
108 }
109 
110 bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
111  unsigned TargetFlags = get(Opcode).TSFlags;
112 
113  return (TargetFlags & R600_InstFlag::ALU_INST);
114 }
115 
116 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
117  unsigned TargetFlags = get(Opcode).TSFlags;
118 
119  return ((TargetFlags & R600_InstFlag::OP1) |
120  (TargetFlags & R600_InstFlag::OP2) |
121  (TargetFlags & R600_InstFlag::OP3));
122 }
123 
124 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
125  unsigned TargetFlags = get(Opcode).TSFlags;
126 
127  return ((TargetFlags & R600_InstFlag::LDS_1A) |
128  (TargetFlags & R600_InstFlag::LDS_1A1D) |
129  (TargetFlags & R600_InstFlag::LDS_1A2D));
130 }
131 
132 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
133  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
134 }
135 
137  if (isALUInstr(MI.getOpcode()))
138  return true;
139  if (isVector(MI) || isCubeOp(MI.getOpcode()))
140  return true;
141  switch (MI.getOpcode()) {
142  case AMDGPU::PRED_X:
143  case AMDGPU::INTERP_PAIR_XY:
144  case AMDGPU::INTERP_PAIR_ZW:
145  case AMDGPU::INTERP_VEC_LOAD:
146  case AMDGPU::COPY:
147  case AMDGPU::DOT_4:
148  return true;
149  default:
150  return false;
151  }
152 }
153 
154 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
155  if (ST.hasCaymanISA())
156  return false;
157  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
158 }
159 
161  return isTransOnly(MI.getOpcode());
162 }
163 
164 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
165  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
166 }
167 
169  return isVectorOnly(MI.getOpcode());
170 }
171 
172 bool R600InstrInfo::isExport(unsigned Opcode) const {
173  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
174 }
175 
176 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
177  return ST.hasVertexCache() && IS_VTX(get(Opcode));
178 }
179 
181  const MachineFunction *MF = MI.getParent()->getParent();
182  return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
184 }
185 
186 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
187  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
188 }
189 
191  const MachineFunction *MF = MI.getParent()->getParent();
192  return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
193  usesVertexCache(MI.getOpcode())) ||
195 }
196 
197 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
198  switch (Opcode) {
199  case AMDGPU::KILLGT:
200  case AMDGPU::GROUP_BARRIER:
201  return true;
202  default:
203  return false;
204  }
205 }
206 
208  return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
209 }
210 
212  return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
213 }
214 
216  if (!isALUInstr(MI.getOpcode())) {
217  return false;
218  }
220  E = MI.operands_end();
221  I != E; ++I) {
222  if (!I->isReg() || !I->isUse() ||
224  continue;
225 
226  if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
227  return true;
228  }
229  return false;
230 }
231 
232 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
233  static const unsigned SrcSelTable[][2] = {
234  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
235  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
236  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
237  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
238  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
239  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
240  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
241  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
242  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
243  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
244  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
245  };
246 
247  for (const auto &Row : SrcSelTable) {
248  if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
249  return getOperandIdx(Opcode, Row[1]);
250  }
251  }
252  return -1;
253 }
254 
258 
259  if (MI.getOpcode() == AMDGPU::DOT_4) {
260  static const unsigned OpTable[8][2] = {
261  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
262  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
263  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
264  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
265  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
266  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
267  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
268  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
269  };
270 
271  for (unsigned j = 0; j < 8; j++) {
272  MachineOperand &MO =
273  MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
274  unsigned Reg = MO.getReg();
275  if (Reg == AMDGPU::ALU_CONST) {
276  MachineOperand &Sel =
277  MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
278  Result.push_back(std::make_pair(&MO, Sel.getImm()));
279  continue;
280  }
281 
282  }
283  return Result;
284  }
285 
286  static const unsigned OpTable[3][2] = {
287  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
288  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
289  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
290  };
291 
292  for (unsigned j = 0; j < 3; j++) {
293  int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]);
294  if (SrcIdx < 0)
295  break;
296  MachineOperand &MO = MI.getOperand(SrcIdx);
297  unsigned Reg = MO.getReg();
298  if (Reg == AMDGPU::ALU_CONST) {
299  MachineOperand &Sel =
300  MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
301  Result.push_back(std::make_pair(&MO, Sel.getImm()));
302  continue;
303  }
304  if (Reg == AMDGPU::ALU_LITERAL_X) {
305  MachineOperand &Operand =
306  MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
307  if (Operand.isImm()) {
308  Result.push_back(std::make_pair(&MO, Operand.getImm()));
309  continue;
310  }
311  assert(Operand.isGlobal());
312  }
313  Result.push_back(std::make_pair(&MO, 0));
314  }
315  return Result;
316 }
317 
318 std::vector<std::pair<int, unsigned>>
319 R600InstrInfo::ExtractSrcs(MachineInstr &MI,
321  unsigned &ConstCount) const {
322  ConstCount = 0;
323  const std::pair<int, unsigned> DummyPair(-1, 0);
324  std::vector<std::pair<int, unsigned> > Result;
325  unsigned i = 0;
326  for (const auto &Src : getSrcs(MI)) {
327  ++i;
328  unsigned Reg = Src.first->getReg();
329  int Index = RI.getEncodingValue(Reg) & 0xff;
330  if (Reg == AMDGPU::OQAP) {
331  Result.push_back(std::make_pair(Index, 0U));
332  }
333  if (PV.find(Reg) != PV.end()) {
334  // 255 is used to tells its a PS/PV reg
335  Result.push_back(std::make_pair(255, 0U));
336  continue;
337  }
338  if (Index > 127) {
339  ConstCount++;
340  Result.push_back(DummyPair);
341  continue;
342  }
343  unsigned Chan = RI.getHWRegChan(Reg);
344  Result.push_back(std::make_pair(Index, Chan));
345  }
346  for (; i < 3; ++i)
347  Result.push_back(DummyPair);
348  return Result;
349 }
350 
351 static std::vector<std::pair<int, unsigned> >
352 Swizzle(std::vector<std::pair<int, unsigned> > Src,
354  if (Src[0] == Src[1])
355  Src[1].first = -1;
356  switch (Swz) {
358  break;
360  std::swap(Src[1], Src[2]);
361  break;
363  std::swap(Src[0], Src[1]);
364  break;
366  std::swap(Src[0], Src[1]);
367  std::swap(Src[0], Src[2]);
368  break;
370  std::swap(Src[0], Src[2]);
371  std::swap(Src[0], Src[1]);
372  break;
374  std::swap(Src[0], Src[2]);
375  break;
376  }
377  return Src;
378 }
379 
380 static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
381  switch (Swz) {
383  unsigned Cycles[3] = { 2, 1, 0};
384  return Cycles[Op];
385  }
387  unsigned Cycles[3] = { 1, 2, 2};
388  return Cycles[Op];
389  }
391  unsigned Cycles[3] = { 2, 1, 2};
392  return Cycles[Op];
393  }
395  unsigned Cycles[3] = { 2, 2, 1};
396  return Cycles[Op];
397  }
398  default:
399  llvm_unreachable("Wrong Swizzle for Trans Slot");
400  }
401 }
402 
403 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
404 /// in the same Instruction Group while meeting read port limitations given a
405 /// Swz swizzle sequence.
407  const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
408  const std::vector<R600InstrInfo::BankSwizzle> &Swz,
409  const std::vector<std::pair<int, unsigned> > &TransSrcs,
410  R600InstrInfo::BankSwizzle TransSwz) const {
411  int Vector[4][3];
412  memset(Vector, -1, sizeof(Vector));
413  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
414  const std::vector<std::pair<int, unsigned> > &Srcs =
415  Swizzle(IGSrcs[i], Swz[i]);
416  for (unsigned j = 0; j < 3; j++) {
417  const std::pair<int, unsigned> &Src = Srcs[j];
418  if (Src.first < 0 || Src.first == 255)
419  continue;
420  if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
421  if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
423  // The value from output queue A (denoted by register OQAP) can
424  // only be fetched during the first cycle.
425  return false;
426  }
427  // OQAP does not count towards the normal read port restrictions
428  continue;
429  }
430  if (Vector[Src.second][j] < 0)
431  Vector[Src.second][j] = Src.first;
432  if (Vector[Src.second][j] != Src.first)
433  return i;
434  }
435  }
436  // Now check Trans Alu
437  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
438  const std::pair<int, unsigned> &Src = TransSrcs[i];
439  unsigned Cycle = getTransSwizzle(TransSwz, i);
440  if (Src.first < 0)
441  continue;
442  if (Src.first == 255)
443  continue;
444  if (Vector[Src.second][Cycle] < 0)
445  Vector[Src.second][Cycle] = Src.first;
446  if (Vector[Src.second][Cycle] != Src.first)
447  return IGSrcs.size() - 1;
448  }
449  return IGSrcs.size();
450 }
451 
452 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
453 /// (in lexicographic term) swizzle sequence assuming that all swizzles after
454 /// Idx can be skipped
455 static bool
457  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
458  unsigned Idx) {
459  assert(Idx < SwzCandidate.size());
460  int ResetIdx = Idx;
461  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
462  ResetIdx --;
463  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
464  SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
465  }
466  if (ResetIdx == -1)
467  return false;
468  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
469  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
470  return true;
471 }
472 
473 /// Enumerate all possible Swizzle sequence to find one that can meet all
474 /// read port requirements.
476  const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
477  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
478  const std::vector<std::pair<int, unsigned> > &TransSrcs,
479  R600InstrInfo::BankSwizzle TransSwz) const {
480  unsigned ValidUpTo = 0;
481  do {
482  ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
483  if (ValidUpTo == IGSrcs.size())
484  return true;
485  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
486  return false;
487 }
488 
489 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read
490 /// a const, and can't read a gpr at cycle 1 if they read 2 const.
491 static bool
493  const std::vector<std::pair<int, unsigned> > &TransOps,
494  unsigned ConstCount) {
495  // TransALU can't read 3 constants
496  if (ConstCount > 2)
497  return false;
498  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
499  const std::pair<int, unsigned> &Src = TransOps[i];
500  unsigned Cycle = getTransSwizzle(TransSwz, i);
501  if (Src.first < 0)
502  continue;
503  if (ConstCount > 0 && Cycle == 0)
504  return false;
505  if (ConstCount > 1 && Cycle == 1)
506  return false;
507  }
508  return true;
509 }
510 
511 bool
512 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
514  std::vector<BankSwizzle> &ValidSwizzle,
515  bool isLastAluTrans)
516  const {
517  //Todo : support shared src0 - src1 operand
518 
519  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
520  ValidSwizzle.clear();
521  unsigned ConstCount;
523  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
524  IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
525  unsigned Op = getOperandIdx(IG[i]->getOpcode(),
526  AMDGPU::OpName::bank_swizzle);
527  ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
528  IG[i]->getOperand(Op).getImm());
529  }
530  std::vector<std::pair<int, unsigned> > TransOps;
531  if (!isLastAluTrans)
532  return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
533 
534  TransOps = std::move(IGSrcs.back());
535  IGSrcs.pop_back();
536  ValidSwizzle.pop_back();
537 
538  static const R600InstrInfo::BankSwizzle TransSwz[] = {
543  };
544  for (unsigned i = 0; i < 4; i++) {
545  TransBS = TransSwz[i];
546  if (!isConstCompatible(TransBS, TransOps, ConstCount))
547  continue;
548  bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
549  TransBS);
550  if (Result) {
551  ValidSwizzle.push_back(TransBS);
552  return true;
553  }
554  }
555 
556  return false;
557 }
558 
559 
560 bool
561 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
562  const {
563  assert (Consts.size() <= 12 && "Too many operands in instructions group");
564  unsigned Pair1 = 0, Pair2 = 0;
565  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
566  unsigned ReadConstHalf = Consts[i] & 2;
567  unsigned ReadConstIndex = Consts[i] & (~3);
568  unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
569  if (!Pair1) {
570  Pair1 = ReadHalfConst;
571  continue;
572  }
573  if (Pair1 == ReadHalfConst)
574  continue;
575  if (!Pair2) {
576  Pair2 = ReadHalfConst;
577  continue;
578  }
579  if (Pair2 != ReadHalfConst)
580  return false;
581  }
582  return true;
583 }
584 
585 bool
586 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
587  const {
588  std::vector<unsigned> Consts;
589  SmallSet<int64_t, 4> Literals;
590  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
591  MachineInstr &MI = *MIs[i];
592  if (!isALUInstr(MI.getOpcode()))
593  continue;
594 
595  for (const auto &Src : getSrcs(MI)) {
596  if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
597  Literals.insert(Src.second);
598  if (Literals.size() > 4)
599  return false;
600  if (Src.first->getReg() == AMDGPU::ALU_CONST)
601  Consts.push_back(Src.second);
602  if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
603  AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
604  unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
605  unsigned Chan = RI.getHWRegChan(Src.first->getReg());
606  Consts.push_back((Index << 2) | Chan);
607  }
608  }
609  }
610  return fitsConstReadLimitations(Consts);
611 }
612 
615  const InstrItineraryData *II = STI.getInstrItineraryData();
616  return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II);
617 }
618 
619 static bool
620 isPredicateSetter(unsigned Opcode) {
621  switch (Opcode) {
622  case AMDGPU::PRED_X:
623  return true;
624  default:
625  return false;
626  }
627 }
628 
629 static MachineInstr *
632  while (I != MBB.begin()) {
633  --I;
634  MachineInstr &MI = *I;
635  if (isPredicateSetter(MI.getOpcode()))
636  return &MI;
637  }
638 
639  return nullptr;
640 }
641 
642 static
643 bool isJump(unsigned Opcode) {
644  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
645 }
646 
647 static bool isBranch(unsigned Opcode) {
648  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
649  Opcode == AMDGPU::BRANCH_COND_f32;
650 }
651 
653  MachineBasicBlock *&TBB,
654  MachineBasicBlock *&FBB,
656  bool AllowModify) const {
657  // Most of the following comes from the ARM implementation of AnalyzeBranch
658 
659  // If the block has no terminators, it just falls into the block after it.
661  if (I == MBB.end())
662  return false;
663 
664  // AMDGPU::BRANCH* instructions are only available after isel and are not
665  // handled
666  if (isBranch(I->getOpcode()))
667  return true;
668  if (!isJump(I->getOpcode())) {
669  return false;
670  }
671 
672  // Remove successive JUMP
673  while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
674  MachineBasicBlock::iterator PriorI = std::prev(I);
675  if (AllowModify)
676  I->removeFromParent();
677  I = PriorI;
678  }
679  MachineInstr &LastInst = *I;
680 
681  // If there is only one terminator instruction, process it.
682  unsigned LastOpc = LastInst.getOpcode();
683  if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
684  if (LastOpc == AMDGPU::JUMP) {
685  TBB = LastInst.getOperand(0).getMBB();
686  return false;
687  } else if (LastOpc == AMDGPU::JUMP_COND) {
688  auto predSet = I;
689  while (!isPredicateSetter(predSet->getOpcode())) {
690  predSet = --I;
691  }
692  TBB = LastInst.getOperand(0).getMBB();
693  Cond.push_back(predSet->getOperand(1));
694  Cond.push_back(predSet->getOperand(2));
695  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
696  return false;
697  }
698  return true; // Can't handle indirect branch.
699  }
700 
701  // Get the instruction before it if it is a terminator.
702  MachineInstr &SecondLastInst = *I;
703  unsigned SecondLastOpc = SecondLastInst.getOpcode();
704 
705  // If the block ends with a B and a Bcc, handle it.
706  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
707  auto predSet = --I;
708  while (!isPredicateSetter(predSet->getOpcode())) {
709  predSet = --I;
710  }
711  TBB = SecondLastInst.getOperand(0).getMBB();
712  FBB = LastInst.getOperand(0).getMBB();
713  Cond.push_back(predSet->getOperand(1));
714  Cond.push_back(predSet->getOperand(2));
715  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
716  return false;
717  }
718 
719  // Otherwise, can't handle this.
720  return true;
721 }
722 
723 static
725  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
726  It != E; ++It) {
727  if (It->getOpcode() == AMDGPU::CF_ALU ||
728  It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
729  return It.getReverse();
730  }
731  return MBB.end();
732 }
733 
735  MachineBasicBlock *TBB,
736  MachineBasicBlock *FBB,
738  const DebugLoc &DL,
739  int *BytesAdded) const {
740  assert(TBB && "insertBranch must not be told to insert a fallthrough");
741  assert(!BytesAdded && "code size not handled");
742 
743  if (!FBB) {
744  if (Cond.empty()) {
745  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
746  return 1;
747  } else {
748  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
749  assert(PredSet && "No previous predicate !");
750  addFlag(*PredSet, 0, MO_FLAG_PUSH);
751  PredSet->getOperand(2).setImm(Cond[1].getImm());
752 
753  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
754  .addMBB(TBB)
755  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
757  if (CfAlu == MBB.end())
758  return 1;
759  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
760  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
761  return 1;
762  }
763  } else {
764  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
765  assert(PredSet && "No previous predicate !");
766  addFlag(*PredSet, 0, MO_FLAG_PUSH);
767  PredSet->getOperand(2).setImm(Cond[1].getImm());
768  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
769  .addMBB(TBB)
770  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
771  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
773  if (CfAlu == MBB.end())
774  return 2;
775  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
776  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
777  return 2;
778  }
779 }
780 
782  int *BytesRemoved) const {
783  assert(!BytesRemoved && "code size not handled");
784 
785  // Note : we leave PRED* instructions there.
786  // They may be needed when predicating instructions.
787 
789 
790  if (I == MBB.begin()) {
791  return 0;
792  }
793  --I;
794  switch (I->getOpcode()) {
795  default:
796  return 0;
797  case AMDGPU::JUMP_COND: {
798  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
799  clearFlag(*predSet, 0, MO_FLAG_PUSH);
800  I->eraseFromParent();
802  if (CfAlu == MBB.end())
803  break;
804  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
805  CfAlu->setDesc(get(AMDGPU::CF_ALU));
806  break;
807  }
808  case AMDGPU::JUMP:
809  I->eraseFromParent();
810  break;
811  }
812  I = MBB.end();
813 
814  if (I == MBB.begin()) {
815  return 1;
816  }
817  --I;
818  switch (I->getOpcode()) {
819  // FIXME: only one case??
820  default:
821  return 1;
822  case AMDGPU::JUMP_COND: {
823  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
824  clearFlag(*predSet, 0, MO_FLAG_PUSH);
825  I->eraseFromParent();
827  if (CfAlu == MBB.end())
828  break;
829  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
830  CfAlu->setDesc(get(AMDGPU::CF_ALU));
831  break;
832  }
833  case AMDGPU::JUMP:
834  I->eraseFromParent();
835  break;
836  }
837  return 2;
838 }
839 
841  int idx = MI.findFirstPredOperandIdx();
842  if (idx < 0)
843  return false;
844 
845  unsigned Reg = MI.getOperand(idx).getReg();
846  switch (Reg) {
847  default: return false;
848  case AMDGPU::PRED_SEL_ONE:
849  case AMDGPU::PRED_SEL_ZERO:
850  case AMDGPU::PREDICATE_BIT:
851  return true;
852  }
853 }
854 
856  // XXX: KILL* instructions can be predicated, but they must be the last
857  // instruction in a clause, so this means any instructions after them cannot
858  // be predicated. Until we have proper support for instruction clauses in the
859  // backend, we will mark KILL* instructions as unpredicable.
860 
861  if (MI.getOpcode() == AMDGPU::KILLGT) {
862  return false;
863  } else if (MI.getOpcode() == AMDGPU::CF_ALU) {
864  // If the clause start in the middle of MBB then the MBB has more
865  // than a single clause, unable to predicate several clauses.
866  if (MI.getParent()->begin() != MachineBasicBlock::iterator(MI))
867  return false;
868  // TODO: We don't support KC merging atm
869  return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0;
870  } else if (isVector(MI)) {
871  return false;
872  } else {
873  return AMDGPUInstrInfo::isPredicable(MI);
874  }
875 }
876 
877 
878 bool
880  unsigned NumCyles,
881  unsigned ExtraPredCycles,
882  BranchProbability Probability) const{
883  return true;
884 }
885 
886 bool
888  unsigned NumTCycles,
889  unsigned ExtraTCycles,
890  MachineBasicBlock &FMBB,
891  unsigned NumFCycles,
892  unsigned ExtraFCycles,
893  BranchProbability Probability) const {
894  return true;
895 }
896 
897 bool
899  unsigned NumCyles,
900  BranchProbability Probability)
901  const {
902  return true;
903 }
904 
905 bool
907  MachineBasicBlock &FMBB) const {
908  return false;
909 }
910 
911 
912 bool
914  MachineOperand &MO = Cond[1];
915  switch (MO.getImm()) {
916  case AMDGPU::PRED_SETE_INT:
917  MO.setImm(AMDGPU::PRED_SETNE_INT);
918  break;
919  case AMDGPU::PRED_SETNE_INT:
920  MO.setImm(AMDGPU::PRED_SETE_INT);
921  break;
922  case AMDGPU::PRED_SETE:
923  MO.setImm(AMDGPU::PRED_SETNE);
924  break;
925  case AMDGPU::PRED_SETNE:
926  MO.setImm(AMDGPU::PRED_SETE);
927  break;
928  default:
929  return true;
930  }
931 
932  MachineOperand &MO2 = Cond[2];
933  switch (MO2.getReg()) {
934  case AMDGPU::PRED_SEL_ZERO:
935  MO2.setReg(AMDGPU::PRED_SEL_ONE);
936  break;
937  case AMDGPU::PRED_SEL_ONE:
938  MO2.setReg(AMDGPU::PRED_SEL_ZERO);
939  break;
940  default:
941  return true;
942  }
943  return false;
944 }
945 
947  std::vector<MachineOperand> &Pred) const {
948  return isPredicateSetter(MI.getOpcode());
949 }
950 
951 
953  ArrayRef<MachineOperand> Pred) const {
954  int PIdx = MI.findFirstPredOperandIdx();
955 
956  if (MI.getOpcode() == AMDGPU::CF_ALU) {
957  MI.getOperand(8).setImm(0);
958  return true;
959  }
960 
961  if (MI.getOpcode() == AMDGPU::DOT_4) {
962  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
963  .setReg(Pred[2].getReg());
964  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
965  .setReg(Pred[2].getReg());
966  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
967  .setReg(Pred[2].getReg());
968  MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
969  .setReg(Pred[2].getReg());
970  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
971  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
972  return true;
973  }
974 
975  if (PIdx != -1) {
976  MachineOperand &PMO = MI.getOperand(PIdx);
977  PMO.setReg(Pred[2].getReg());
978  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
979  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
980  return true;
981  }
982 
983  return false;
984 }
985 
986 unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const {
987  return 2;
988 }
989 
991  const MachineInstr &,
992  unsigned *PredCost) const {
993  if (PredCost)
994  *PredCost = 2;
995  return 2;
996 }
997 
998 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
999  unsigned Channel) const {
1000  assert(Channel == 0);
1001  return RegIndex;
1002 }
1003 
1005  switch (MI.getOpcode()) {
1006  default: {
1008  int OffsetOpIdx =
1009  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
1010  // addr is a custom operand with multiple MI operands, and only the
1011  // first MI operand is given a name.
1012  int RegOpIdx = OffsetOpIdx + 1;
1013  int ChanOpIdx =
1014  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
1015  if (isRegisterLoad(MI)) {
1016  int DstOpIdx =
1017  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
1018  unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1019  unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1020  unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1021  unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1022  if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
1023  buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
1024  getIndirectAddrRegClass()->getRegister(Address));
1025  } else {
1026  buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address,
1027  OffsetReg);
1028  }
1029  } else if (isRegisterStore(MI)) {
1030  int ValOpIdx =
1031  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
1032  unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1033  unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1034  unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1035  unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1036  if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
1037  buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
1038  MI.getOperand(ValOpIdx).getReg());
1039  } else {
1040  buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(),
1041  calculateIndirectAddress(RegIndex, Channel),
1042  OffsetReg);
1043  }
1044  } else {
1045  return false;
1046  }
1047 
1048  MBB->erase(MI);
1049  return true;
1050  }
1051  case AMDGPU::R600_EXTRACT_ELT_V2:
1052  case AMDGPU::R600_EXTRACT_ELT_V4:
1053  buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
1054  RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
1055  MI.getOperand(2).getReg(),
1056  RI.getHWRegChan(MI.getOperand(1).getReg()));
1057  break;
1058  case AMDGPU::R600_INSERT_ELT_V2:
1059  case AMDGPU::R600_INSERT_ELT_V4:
1060  buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
1061  RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
1062  MI.getOperand(3).getReg(), // Offset
1063  RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel
1064  break;
1065  }
1066  MI.eraseFromParent();
1067  return true;
1068 }
1069 
1071  const MachineFunction &MF) const {
1072  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1073  const R600FrameLowering *TFL = ST.getFrameLowering();
1074 
1075  unsigned StackWidth = TFL->getStackWidth(MF);
1076  int End = getIndirectIndexEnd(MF);
1077 
1078  if (End == -1)
1079  return;
1080 
1081  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1082  unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1083  Reserved.set(SuperReg);
1084  for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1085  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1086  Reserved.set(Reg);
1087  }
1088  }
1089 }
1090 
1092  return &AMDGPU::R600_TReg32_XRegClass;
1093 }
1094 
1095 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1097  unsigned ValueReg, unsigned Address,
1098  unsigned OffsetReg) const {
1099  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
1100 }
1101 
1102 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1104  unsigned ValueReg, unsigned Address,
1105  unsigned OffsetReg,
1106  unsigned AddrChan) const {
1107  unsigned AddrReg;
1108  switch (AddrChan) {
1109  default: llvm_unreachable("Invalid Channel");
1110  case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1111  case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1112  case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1113  case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1114  }
1115  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1116  AMDGPU::AR_X, OffsetReg);
1118 
1119  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1120  AddrReg, ValueReg)
1121  .addReg(AMDGPU::AR_X,
1123  setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
1124  return Mov;
1125 }
1126 
1127 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1129  unsigned ValueReg, unsigned Address,
1130  unsigned OffsetReg) const {
1131  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
1132 }
1133 
1134 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1136  unsigned ValueReg, unsigned Address,
1137  unsigned OffsetReg,
1138  unsigned AddrChan) const {
1139  unsigned AddrReg;
1140  switch (AddrChan) {
1141  default: llvm_unreachable("Invalid Channel");
1142  case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1143  case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1144  case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1145  case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1146  }
1147  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1148  AMDGPU::AR_X,
1149  OffsetReg);
1151  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1152  ValueReg,
1153  AddrReg)
1154  .addReg(AMDGPU::AR_X,
1156  setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
1157 
1158  return Mov;
1159 }
1160 
1162  const MachineRegisterInfo &MRI = MF.getRegInfo();
1163  const MachineFrameInfo &MFI = MF.getFrameInfo();
1164  int Offset = -1;
1165 
1166  if (MFI.getNumObjects() == 0) {
1167  return -1;
1168  }
1169 
1170  if (MRI.livein_empty()) {
1171  return 0;
1172  }
1173 
1174  const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
1176  LE = MRI.livein_end();
1177  LI != LE; ++LI) {
1178  unsigned Reg = LI->first;
1180  !IndirectRC->contains(Reg))
1181  continue;
1182 
1183  unsigned RegIndex;
1184  unsigned RegEnd;
1185  for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
1186  ++RegIndex) {
1187  if (IndirectRC->getRegister(RegIndex) == Reg)
1188  break;
1189  }
1190  Offset = std::max(Offset, (int)RegIndex);
1191  }
1192 
1193  return Offset + 1;
1194 }
1195 
1197  int Offset = 0;
1198  const MachineFrameInfo &MFI = MF.getFrameInfo();
1199 
1200  // Variable sized objects are not supported
1201  if (MFI.hasVarSizedObjects()) {
1202  return -1;
1203  }
1204 
1205  if (MFI.getNumObjects() == 0) {
1206  return -1;
1207  }
1208 
1209  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1210  const R600FrameLowering *TFL = ST.getFrameLowering();
1211 
1212  unsigned IgnoredFrameReg;
1213  Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
1214 
1215  return getIndirectIndexBegin(MF) + Offset;
1216 }
1217 
1219  return 115;
1220 }
1221 
1224  unsigned Opcode,
1225  unsigned DstReg,
1226  unsigned Src0Reg,
1227  unsigned Src1Reg) const {
1228  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1229  DstReg); // $dst
1230 
1231  if (Src1Reg) {
1232  MIB.addImm(0) // $update_exec_mask
1233  .addImm(0); // $update_predicate
1234  }
1235  MIB.addImm(1) // $write
1236  .addImm(0) // $omod
1237  .addImm(0) // $dst_rel
1238  .addImm(0) // $dst_clamp
1239  .addReg(Src0Reg) // $src0
1240  .addImm(0) // $src0_neg
1241  .addImm(0) // $src0_rel
1242  .addImm(0) // $src0_abs
1243  .addImm(-1); // $src0_sel
1244 
1245  if (Src1Reg) {
1246  MIB.addReg(Src1Reg) // $src1
1247  .addImm(0) // $src1_neg
1248  .addImm(0) // $src1_rel
1249  .addImm(0) // $src1_abs
1250  .addImm(-1); // $src1_sel
1251  }
1252 
1253  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1254  //scheduling to the backend, we can change the default to 0.
1255  MIB.addImm(1) // $last
1256  .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1257  .addImm(0) // $literal
1258  .addImm(0); // $bank_swizzle
1259 
1260  return MIB;
1261 }
1262 
1263 #define OPERAND_CASE(Label) \
1264  case Label: { \
1265  static const unsigned Ops[] = \
1266  { \
1267  Label##_X, \
1268  Label##_Y, \
1269  Label##_Z, \
1270  Label##_W \
1271  }; \
1272  return Ops[Slot]; \
1273  }
1274 
1275 static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
1276  switch (Op) {
1277  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1278  OPERAND_CASE(AMDGPU::OpName::update_pred)
1280  OPERAND_CASE(AMDGPU::OpName::omod)
1281  OPERAND_CASE(AMDGPU::OpName::dst_rel)
1282  OPERAND_CASE(AMDGPU::OpName::clamp)
1283  OPERAND_CASE(AMDGPU::OpName::src0)
1284  OPERAND_CASE(AMDGPU::OpName::src0_neg)
1285  OPERAND_CASE(AMDGPU::OpName::src0_rel)
1286  OPERAND_CASE(AMDGPU::OpName::src0_abs)
1287  OPERAND_CASE(AMDGPU::OpName::src0_sel)
1288  OPERAND_CASE(AMDGPU::OpName::src1)
1289  OPERAND_CASE(AMDGPU::OpName::src1_neg)
1290  OPERAND_CASE(AMDGPU::OpName::src1_rel)
1291  OPERAND_CASE(AMDGPU::OpName::src1_abs)
1292  OPERAND_CASE(AMDGPU::OpName::src1_sel)
1293  OPERAND_CASE(AMDGPU::OpName::pred_sel)
1294  default:
1295  llvm_unreachable("Wrong Operand");
1296  }
1297 }
1298 
1299 #undef OPERAND_CASE
1300 
1302  MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1303  const {
1304  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1305  unsigned Opcode;
1306  if (ST.getGeneration() <= R600Subtarget::R700)
1307  Opcode = AMDGPU::DOT4_r600;
1308  else
1309  Opcode = AMDGPU::DOT4_eg;
1311  MachineOperand &Src0 = MI->getOperand(
1312  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1313  MachineOperand &Src1 = MI->getOperand(
1314  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1316  MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1317  static const unsigned Operands[14] = {
1318  AMDGPU::OpName::update_exec_mask,
1319  AMDGPU::OpName::update_pred,
1321  AMDGPU::OpName::omod,
1322  AMDGPU::OpName::dst_rel,
1323  AMDGPU::OpName::clamp,
1324  AMDGPU::OpName::src0_neg,
1325  AMDGPU::OpName::src0_rel,
1326  AMDGPU::OpName::src0_abs,
1327  AMDGPU::OpName::src0_sel,
1328  AMDGPU::OpName::src1_neg,
1329  AMDGPU::OpName::src1_rel,
1330  AMDGPU::OpName::src1_abs,
1331  AMDGPU::OpName::src1_sel,
1332  };
1333 
1335  getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1336  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1337  .setReg(MO.getReg());
1338 
1339  for (unsigned i = 0; i < 14; i++) {
1340  MachineOperand &MO = MI->getOperand(
1341  getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1342  assert (MO.isImm());
1343  setImmOperand(*MIB, Operands[i], MO.getImm());
1344  }
1345  MIB->getOperand(20).setImm(0);
1346  return MIB;
1347 }
1348 
1351  unsigned DstReg,
1352  uint64_t Imm) const {
1353  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1354  AMDGPU::ALU_LITERAL_X);
1355  setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
1356  return MovImm;
1357 }
1358 
1361  unsigned DstReg, unsigned SrcReg) const {
1362  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1363 }
1364 
1365 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1366  return getOperandIdx(MI.getOpcode(), Op);
1367 }
1368 
1369 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1370  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1371 }
1372 
1374  int64_t Imm) const {
1375  int Idx = getOperandIdx(MI, Op);
1376  assert(Idx != -1 && "Operand not supported for this instruction.");
1377  assert(MI.getOperand(Idx).isImm());
1378  MI.getOperand(Idx).setImm(Imm);
1379 }
1380 
1381 //===----------------------------------------------------------------------===//
1382 // Instruction flag getters/setters
1383 //===----------------------------------------------------------------------===//
1384 
1386  unsigned Flag) const {
1387  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1388  int FlagIndex = 0;
1389  if (Flag != 0) {
1390  // If we pass something other than the default value of Flag to this
1391  // function, it means we are want to set a flag on an instruction
1392  // that uses native encoding.
1393  assert(HAS_NATIVE_OPERANDS(TargetFlags));
1394  bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1395  switch (Flag) {
1396  case MO_FLAG_CLAMP:
1397  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
1398  break;
1399  case MO_FLAG_MASK:
1400  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
1401  break;
1402  case MO_FLAG_NOT_LAST:
1403  case MO_FLAG_LAST:
1404  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
1405  break;
1406  case MO_FLAG_NEG:
1407  switch (SrcIdx) {
1408  case 0:
1409  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
1410  break;
1411  case 1:
1412  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
1413  break;
1414  case 2:
1415  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
1416  break;
1417  }
1418  break;
1419 
1420  case MO_FLAG_ABS:
1421  assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1422  "instructions.");
1423  (void)IsOP3;
1424  switch (SrcIdx) {
1425  case 0:
1426  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
1427  break;
1428  case 1:
1429  FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
1430  break;
1431  }
1432  break;
1433 
1434  default:
1435  FlagIndex = -1;
1436  break;
1437  }
1438  assert(FlagIndex != -1 && "Flag not supported for this instruction");
1439  } else {
1440  FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1441  assert(FlagIndex != 0 &&
1442  "Instruction flags not supported for this instruction");
1443  }
1444 
1445  MachineOperand &FlagOp = MI.getOperand(FlagIndex);
1446  assert(FlagOp.isImm());
1447  return FlagOp;
1448 }
1449 
1450 void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand,
1451  unsigned Flag) const {
1452  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1453  if (Flag == 0) {
1454  return;
1455  }
1456  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1457  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1458  if (Flag == MO_FLAG_NOT_LAST) {
1459  clearFlag(MI, Operand, MO_FLAG_LAST);
1460  } else if (Flag == MO_FLAG_MASK) {
1461  clearFlag(MI, Operand, Flag);
1462  } else {
1463  FlagOp.setImm(1);
1464  }
1465  } else {
1466  MachineOperand &FlagOp = getFlagOp(MI, Operand);
1467  FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1468  }
1469 }
1470 
1471 void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
1472  unsigned Flag) const {
1473  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1474  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1475  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1476  FlagOp.setImm(0);
1477  } else {
1478  MachineOperand &FlagOp = getFlagOp(MI);
1479  unsigned InstFlags = FlagOp.getImm();
1480  InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1481  FlagOp.setImm(InstFlags);
1482  }
1483 }
SmallVector< std::pair< MachineOperand *, int64_t >, 3 > getSrcs(MachineInstr &MI) const
MachineInstr * buildSlotOfVectorInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) const
int getIndirectIndexEnd(const MachineFunction &MF) const
bool hasCaymanISA() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
BitVector & set()
Definition: BitVector.h:219
mop_iterator operands_end()
Definition: MachineInstr.h:296
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
unsigned int getPredicationCost(const MachineInstr &) const override
bool isLDSInstr(unsigned Opcode) const
AMDGPU specific subclass of TargetSubtarget.
size_t i
bool hasVertexCache() const
unsigned getRegister(unsigned i) const
Return the specified register in the class.
livein_iterator livein_end() const
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
#define MO_FLAG_LAST
Definition: R600Defines.h:23
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
#define NUM_MO_FLAGS
Definition: R600Defines.h:24
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const
Calculate the "Indirect Address" for the given RegIndex and Channel.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
unsigned getNumObjects() const
Return the number of objects.
bool fitsReadPortLimitations(const std::vector< MachineInstr * > &MIs, const DenseMap< unsigned, unsigned > &PV, std::vector< BankSwizzle > &BS, bool isLastAluTrans) const
Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 returns true and the first ...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool isPredicated(const MachineInstr &MI) const override
static unsigned getSlotedOps(unsigned Op, unsigned Slot)
static bool isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, const std::vector< std::pair< int, unsigned > > &TransOps, unsigned ConstCount)
Instructions in Trans slot can't read gpr at cycle 0 if they also read a const, and can't read a gpr ...
MachineInstrBundleIterator< MachineInstr > iterator
bool expandPostRAPseudo(MachineInstr &MI) const override
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
Interface definition for R600RegisterInfo.
bool isVectorOnly(unsigned Opcode) const
return AArch64::GPR64RegClass contains(Reg)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isCubeOp(unsigned opcode) const
unsigned int getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
static MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op)
#define MO_FLAG_ABS
Definition: R600Defines.h:19
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
R600InstrInfo(const R600Subtarget &)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, BranchProbability Probability) const override
size_type size() const
Definition: SmallSet.h:59
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
#define HAS_NATIVE_OPERANDS(Flags)
Definition: R600Defines.h:53
bool hasInstrModifiers(unsigned Opcode) const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineInstr * buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const
static bool isJump(unsigned Opcode)
bool usesVertexCache(unsigned Opcode) const
MachineBasicBlock * MBB
#define MO_FLAG_NEG
Definition: R600Defines.h:18
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool usesTextureCache(unsigned Opcode) const
int64_t getImm() const
Generation getGeneration() const
reverse_iterator rend()
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
bool isExport(unsigned Opcode) const
const R600FrameLowering * getFrameLowering() const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemvoed=nullptr) const override
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
unsigned const MachineRegisterInfo * MRI
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool FindSwizzleForVectorSlot(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
Enumerate all possible Swizzle sequence to find one that can meet all read port requirements.
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
unsigned getSubRegFromChannel(unsigned Channel) const
bool isCompute(CallingConv::ID cc)
const TargetRegisterClass * getIndirectAddrRegClass() const
uint32_t Offset
static const unsigned End
void setImm(int64_t immVal)
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
static MachineInstr * findFirstPredicateSetterFrom(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
The AMDGPU TargetMachine interface definition for hw codgen targets.
static void write(bool isBE, void *P, T V)
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
unsigned getHWRegIndex(unsigned Reg) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
void setIsKill(bool Val=true)
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions.
bool readsLDSSrcReg(const MachineInstr &MI) const
static bool NextPossibleSolution(std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, unsigned Idx)
Given a swizzle sequence SwzCandidate and an index Idx, returns the next (in lexicographic term) swiz...
MachineOperand & getFlagOp(MachineInstr &MI, unsigned SrcIdx=0, unsigned Flag=0) const
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
void clearFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Clear the specified flag on the instruction.
bool isPhysRegLiveAcrossClauses(unsigned Reg) const
#define OPERAND_CASE(Label)
livein_iterator livein_begin() const
unsigned getStackWidth(const MachineFunction &MF) const
#define IS_TEX(desc)
Definition: R600Defines.h:63
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
bool canBeConsideredALU(const MachineInstr &MI) const
bool mustBeLastInClause(unsigned Opcode) const
static bool isPredicateSetter(unsigned Opcode)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned getNumRegs() const
Return the number of registers in this class.
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
Definition: MachineInstr.h:52
void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const
Reserve the registers that may be accesed using indirect addressing.
#define MO_FLAG_NOT_LAST
Definition: R600Defines.h:22
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool definesAddressRegister(MachineInstr &MI) const
#define GET_FLAG_OPERAND_IDX(Flags)
Helper for getting the operand index for the instruction flags operand.
Definition: R600Defines.h:28
void setReg(unsigned Reg)
Change the register this operand corresponds to.
bool isPredicable(MachineInstr &MI) const override
unsigned isLegalUpTo(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, const std::vector< R600InstrInfo::BankSwizzle > &Swz, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
returns how many MIs (whose inputs are represented by IGSrcs) can be packed in the same Instruction G...
#define I(x, y, z)
Definition: MD5.cpp:54
iterator end()
Definition: DenseMap.h:69
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool isRegisterLoad(const MachineInstr &MI) const
bool usesAddressRegister(MachineInstr &MI) const
unsigned getMaxAlusPerClause() const
bool isReductionOp(unsigned opcode) const
std::vector< std::pair< unsigned, unsigned > >::const_iterator livein_iterator
unsigned getReg() const
getReg - Returns the register number.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const override
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isBranch(unsigned Opcode)
mop_iterator operands_begin()
Definition: MachineInstr.h:295
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
#define GET_REG_INDEX(reg)
Definition: R600Defines.h:60
#define IS_VTX(desc)
Definition: R600Defines.h:62
bool isMov(unsigned Opcode) const
IRTranslator LLVM IR MI
int getIndirectIndexBegin(const MachineFunction &MF) const
#define MO_FLAG_MASK
Definition: R600Defines.h:20
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, BranchProbability Probability) const override
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
bool isRegisterStore(const MachineInstr &MI) const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
bool isLDSRetInstr(unsigned Opcode) const
bool isALUInstr(unsigned Opcode) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isTransOnly(unsigned Opcode) const
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.