LLVM  3.7.0
R600InstrInfo.cpp
Go to the documentation of this file.
1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief R600 Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600InstrInfo.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "R600Defines.h"
21 #include "R600RegisterInfo.h"
25 
26 using namespace llvm;
27 
28 #define GET_INSTRINFO_CTOR_DTOR
29 #include "AMDGPUGenDFAPacketizer.inc"
30 
32  : AMDGPUInstrInfo(st), RI() {}
33 
35  return RI;
36 }
37 
39  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
40 }
41 
43  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
44 }
45 
46 void
49  unsigned DestReg, unsigned SrcReg,
50  bool KillSrc) const {
51  unsigned VectorComponents = 0;
52  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
53  AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
54  (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
55  AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
56  VectorComponents = 4;
57  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
58  AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
59  (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
60  AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
61  VectorComponents = 2;
62  }
63 
64  if (VectorComponents > 0) {
65  for (unsigned I = 0; I < VectorComponents; I++) {
66  unsigned SubRegIndex = RI.getSubRegFromChannel(I);
67  buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
68  RI.getSubReg(DestReg, SubRegIndex),
69  RI.getSubReg(SrcReg, SubRegIndex))
70  .addReg(DestReg,
72  }
73  } else {
74  MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
75  DestReg, SrcReg);
76  NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
77  .setIsKill(KillSrc);
78  }
79 }
80 
81 /// \returns true if \p MBBI can be moved into a new basic.
83  MachineBasicBlock::iterator MBBI) const {
84  for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
85  E = MBBI->operands_end(); I != E; ++I) {
86  if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
87  I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
88  return false;
89  }
90  return true;
91 }
92 
93 bool R600InstrInfo::isMov(unsigned Opcode) const {
94 
95 
96  switch(Opcode) {
97  default: return false;
98  case AMDGPU::MOV:
99  case AMDGPU::MOV_IMM_F32:
100  case AMDGPU::MOV_IMM_I32:
101  return true;
102  }
103 }
104 
105 // Some instructions act as place holders to emulate operations that the GPU
106 // hardware does automatically. This function can be used to check if
107 // an opcode falls into this category.
108 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
109  switch (Opcode) {
110  default: return false;
111  case AMDGPU::RETURN:
112  return true;
113  }
114 }
115 
116 bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
117  return false;
118 }
119 
120 bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
121  switch(Opcode) {
122  default: return false;
123  case AMDGPU::CUBE_r600_pseudo:
124  case AMDGPU::CUBE_r600_real:
125  case AMDGPU::CUBE_eg_pseudo:
126  case AMDGPU::CUBE_eg_real:
127  return true;
128  }
129 }
130 
131 bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
132  unsigned TargetFlags = get(Opcode).TSFlags;
133 
134  return (TargetFlags & R600_InstFlag::ALU_INST);
135 }
136 
137 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
138  unsigned TargetFlags = get(Opcode).TSFlags;
139 
140  return ((TargetFlags & R600_InstFlag::OP1) |
141  (TargetFlags & R600_InstFlag::OP2) |
142  (TargetFlags & R600_InstFlag::OP3));
143 }
144 
145 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
146  unsigned TargetFlags = get(Opcode).TSFlags;
147 
148  return ((TargetFlags & R600_InstFlag::LDS_1A) |
149  (TargetFlags & R600_InstFlag::LDS_1A1D) |
150  (TargetFlags & R600_InstFlag::LDS_1A2D));
151 }
152 
153 bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
154  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
155 }
156 
157 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
158  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
159 }
160 
162  if (isALUInstr(MI->getOpcode()))
163  return true;
164  if (isVector(*MI) || isCubeOp(MI->getOpcode()))
165  return true;
166  switch (MI->getOpcode()) {
167  case AMDGPU::PRED_X:
168  case AMDGPU::INTERP_PAIR_XY:
169  case AMDGPU::INTERP_PAIR_ZW:
170  case AMDGPU::INTERP_VEC_LOAD:
171  case AMDGPU::COPY:
172  case AMDGPU::DOT_4:
173  return true;
174  default:
175  return false;
176  }
177 }
178 
179 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
180  if (ST.hasCaymanISA())
181  return false;
182  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
183 }
184 
186  return isTransOnly(MI->getOpcode());
187 }
188 
189 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
190  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
191 }
192 
194  return isVectorOnly(MI->getOpcode());
195 }
196 
197 bool R600InstrInfo::isExport(unsigned Opcode) const {
198  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
199 }
200 
201 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
202  return ST.hasVertexCache() && IS_VTX(get(Opcode));
203 }
204 
206  const MachineFunction *MF = MI->getParent()->getParent();
208  return MFI->getShaderType() != ShaderType::COMPUTE &&
209  usesVertexCache(MI->getOpcode());
210 }
211 
212 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
213  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
214 }
215 
217  const MachineFunction *MF = MI->getParent()->getParent();
219  return (MFI->getShaderType() == ShaderType::COMPUTE &&
220  usesVertexCache(MI->getOpcode())) ||
222 }
223 
224 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
225  switch (Opcode) {
226  case AMDGPU::KILLGT:
227  case AMDGPU::GROUP_BARRIER:
228  return true;
229  default:
230  return false;
231  }
232 }
233 
235  return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
236 }
237 
239  return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
240 }
241 
243  if (!isALUInstr(MI->getOpcode())) {
244  return false;
245  }
247  E = MI->operands_end(); I != E; ++I) {
248  if (!I->isReg() || !I->isUse() ||
250  continue;
251 
252  if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
253  return true;
254  }
255  return false;
256 }
257 
258 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
259  static const unsigned OpTable[] = {
260  AMDGPU::OpName::src0,
261  AMDGPU::OpName::src1,
262  AMDGPU::OpName::src2
263  };
264 
265  assert (SrcNum < 3);
266  return getOperandIdx(Opcode, OpTable[SrcNum]);
267 }
268 
269 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
270  static const unsigned SrcSelTable[][2] = {
271  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
272  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
273  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
274  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
275  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
276  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
277  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
278  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
279  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
280  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
281  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
282  };
283 
284  for (const auto &Row : SrcSelTable) {
285  if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
286  return getOperandIdx(Opcode, Row[1]);
287  }
288  }
289  return -1;
290 }
291 
295 
296  if (MI->getOpcode() == AMDGPU::DOT_4) {
297  static const unsigned OpTable[8][2] = {
298  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
299  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
300  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
301  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
302  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
303  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
304  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
305  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
306  };
307 
308  for (unsigned j = 0; j < 8; j++) {
310  OpTable[j][0]));
311  unsigned Reg = MO.getReg();
312  if (Reg == AMDGPU::ALU_CONST) {
313  unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
314  OpTable[j][1])).getImm();
315  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
316  continue;
317  }
318 
319  }
320  return Result;
321  }
322 
323  static const unsigned OpTable[3][2] = {
324  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
325  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
326  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
327  };
328 
329  for (unsigned j = 0; j < 3; j++) {
330  int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
331  if (SrcIdx < 0)
332  break;
333  MachineOperand &MO = MI->getOperand(SrcIdx);
334  unsigned Reg = MI->getOperand(SrcIdx).getReg();
335  if (Reg == AMDGPU::ALU_CONST) {
336  unsigned Sel = MI->getOperand(
337  getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
338  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
339  continue;
340  }
341  if (Reg == AMDGPU::ALU_LITERAL_X) {
342  unsigned Imm = MI->getOperand(
343  getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
344  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
345  continue;
346  }
347  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
348  }
349  return Result;
350 }
351 
352 std::vector<std::pair<int, unsigned> >
353 R600InstrInfo::ExtractSrcs(MachineInstr *MI,
355  unsigned &ConstCount) const {
356  ConstCount = 0;
358  const std::pair<int, unsigned> DummyPair(-1, 0);
359  std::vector<std::pair<int, unsigned> > Result;
360  unsigned i = 0;
361  for (unsigned n = Srcs.size(); i < n; ++i) {
362  unsigned Reg = Srcs[i].first->getReg();
363  unsigned Index = RI.getEncodingValue(Reg) & 0xff;
364  if (Reg == AMDGPU::OQAP) {
365  Result.push_back(std::pair<int, unsigned>(Index, 0));
366  }
367  if (PV.find(Reg) != PV.end()) {
368  // 255 is used to tells its a PS/PV reg
369  Result.push_back(std::pair<int, unsigned>(255, 0));
370  continue;
371  }
372  if (Index > 127) {
373  ConstCount++;
374  Result.push_back(DummyPair);
375  continue;
376  }
377  unsigned Chan = RI.getHWRegChan(Reg);
378  Result.push_back(std::pair<int, unsigned>(Index, Chan));
379  }
380  for (; i < 3; ++i)
381  Result.push_back(DummyPair);
382  return Result;
383 }
384 
385 static std::vector<std::pair<int, unsigned> >
386 Swizzle(std::vector<std::pair<int, unsigned> > Src,
388  if (Src[0] == Src[1])
389  Src[1].first = -1;
390  switch (Swz) {
392  break;
394  std::swap(Src[1], Src[2]);
395  break;
397  std::swap(Src[0], Src[1]);
398  break;
400  std::swap(Src[0], Src[1]);
401  std::swap(Src[0], Src[2]);
402  break;
404  std::swap(Src[0], Src[2]);
405  std::swap(Src[0], Src[1]);
406  break;
408  std::swap(Src[0], Src[2]);
409  break;
410  }
411  return Src;
412 }
413 
414 static unsigned
416  switch (Swz) {
418  unsigned Cycles[3] = { 2, 1, 0};
419  return Cycles[Op];
420  }
422  unsigned Cycles[3] = { 1, 2, 2};
423  return Cycles[Op];
424  }
426  unsigned Cycles[3] = { 2, 1, 2};
427  return Cycles[Op];
428  }
430  unsigned Cycles[3] = { 2, 2, 1};
431  return Cycles[Op];
432  }
433  default:
434  llvm_unreachable("Wrong Swizzle for Trans Slot");
435  return 0;
436  }
437 }
438 
439 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
440 /// in the same Instruction Group while meeting read port limitations given a
441 /// Swz swizzle sequence.
443  const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
444  const std::vector<R600InstrInfo::BankSwizzle> &Swz,
445  const std::vector<std::pair<int, unsigned> > &TransSrcs,
446  R600InstrInfo::BankSwizzle TransSwz) const {
447  int Vector[4][3];
448  memset(Vector, -1, sizeof(Vector));
449  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
450  const std::vector<std::pair<int, unsigned> > &Srcs =
451  Swizzle(IGSrcs[i], Swz[i]);
452  for (unsigned j = 0; j < 3; j++) {
453  const std::pair<int, unsigned> &Src = Srcs[j];
454  if (Src.first < 0 || Src.first == 255)
455  continue;
456  if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
457  if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
459  // The value from output queue A (denoted by register OQAP) can
460  // only be fetched during the first cycle.
461  return false;
462  }
463  // OQAP does not count towards the normal read port restrictions
464  continue;
465  }
466  if (Vector[Src.second][j] < 0)
467  Vector[Src.second][j] = Src.first;
468  if (Vector[Src.second][j] != Src.first)
469  return i;
470  }
471  }
472  // Now check Trans Alu
473  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
474  const std::pair<int, unsigned> &Src = TransSrcs[i];
475  unsigned Cycle = getTransSwizzle(TransSwz, i);
476  if (Src.first < 0)
477  continue;
478  if (Src.first == 255)
479  continue;
480  if (Vector[Src.second][Cycle] < 0)
481  Vector[Src.second][Cycle] = Src.first;
482  if (Vector[Src.second][Cycle] != Src.first)
483  return IGSrcs.size() - 1;
484  }
485  return IGSrcs.size();
486 }
487 
488 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
489 /// (in lexicographic term) swizzle sequence assuming that all swizzles after
490 /// Idx can be skipped
491 static bool
493  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
494  unsigned Idx) {
495  assert(Idx < SwzCandidate.size());
496  int ResetIdx = Idx;
497  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
498  ResetIdx --;
499  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
500  SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
501  }
502  if (ResetIdx == -1)
503  return false;
504  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
505  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
506  return true;
507 }
508 
509 /// Enumerate all possible Swizzle sequence to find one that can meet all
510 /// read port requirements.
512  const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
513  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
514  const std::vector<std::pair<int, unsigned> > &TransSrcs,
515  R600InstrInfo::BankSwizzle TransSwz) const {
516  unsigned ValidUpTo = 0;
517  do {
518  ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
519  if (ValidUpTo == IGSrcs.size())
520  return true;
521  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
522  return false;
523 }
524 
525 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read
526 /// a const, and can't read a gpr at cycle 1 if they read 2 const.
527 static bool
529  const std::vector<std::pair<int, unsigned> > &TransOps,
530  unsigned ConstCount) {
531  // TransALU can't read 3 constants
532  if (ConstCount > 2)
533  return false;
534  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
535  const std::pair<int, unsigned> &Src = TransOps[i];
536  unsigned Cycle = getTransSwizzle(TransSwz, i);
537  if (Src.first < 0)
538  continue;
539  if (ConstCount > 0 && Cycle == 0)
540  return false;
541  if (ConstCount > 1 && Cycle == 1)
542  return false;
543  }
544  return true;
545 }
546 
547 bool
548 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
550  std::vector<BankSwizzle> &ValidSwizzle,
551  bool isLastAluTrans)
552  const {
553  //Todo : support shared src0 - src1 operand
554 
555  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
556  ValidSwizzle.clear();
557  unsigned ConstCount;
559  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
560  IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
561  unsigned Op = getOperandIdx(IG[i]->getOpcode(),
562  AMDGPU::OpName::bank_swizzle);
563  ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
564  IG[i]->getOperand(Op).getImm());
565  }
566  std::vector<std::pair<int, unsigned> > TransOps;
567  if (!isLastAluTrans)
568  return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
569 
570  TransOps = std::move(IGSrcs.back());
571  IGSrcs.pop_back();
572  ValidSwizzle.pop_back();
573 
574  static const R600InstrInfo::BankSwizzle TransSwz[] = {
579  };
580  for (unsigned i = 0; i < 4; i++) {
581  TransBS = TransSwz[i];
582  if (!isConstCompatible(TransBS, TransOps, ConstCount))
583  continue;
584  bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
585  TransBS);
586  if (Result) {
587  ValidSwizzle.push_back(TransBS);
588  return true;
589  }
590  }
591 
592  return false;
593 }
594 
595 
596 bool
597 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
598  const {
599  assert (Consts.size() <= 12 && "Too many operands in instructions group");
600  unsigned Pair1 = 0, Pair2 = 0;
601  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
602  unsigned ReadConstHalf = Consts[i] & 2;
603  unsigned ReadConstIndex = Consts[i] & (~3);
604  unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
605  if (!Pair1) {
606  Pair1 = ReadHalfConst;
607  continue;
608  }
609  if (Pair1 == ReadHalfConst)
610  continue;
611  if (!Pair2) {
612  Pair2 = ReadHalfConst;
613  continue;
614  }
615  if (Pair2 != ReadHalfConst)
616  return false;
617  }
618  return true;
619 }
620 
621 bool
622 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
623  const {
624  std::vector<unsigned> Consts;
625  SmallSet<int64_t, 4> Literals;
626  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
627  MachineInstr *MI = MIs[i];
628  if (!isALUInstr(MI->getOpcode()))
629  continue;
630 
632 
633  for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
634  std::pair<MachineOperand *, unsigned> Src = Srcs[j];
635  if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
636  Literals.insert(Src.second);
637  if (Literals.size() > 4)
638  return false;
639  if (Src.first->getReg() == AMDGPU::ALU_CONST)
640  Consts.push_back(Src.second);
641  if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
642  AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
643  unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
644  unsigned Chan = RI.getHWRegChan(Src.first->getReg());
645  Consts.push_back((Index << 2) | Chan);
646  }
647  }
648  }
649  return fitsConstReadLimitations(Consts);
650 }
651 
654  const InstrItineraryData *II = STI.getInstrItineraryData();
655  return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);
656 }
657 
658 static bool
659 isPredicateSetter(unsigned Opcode) {
660  switch (Opcode) {
661  case AMDGPU::PRED_X:
662  return true;
663  default:
664  return false;
665  }
666 }
667 
668 static MachineInstr *
671  while (I != MBB.begin()) {
672  --I;
673  MachineInstr *MI = I;
674  if (isPredicateSetter(MI->getOpcode()))
675  return MI;
676  }
677 
678  return nullptr;
679 }
680 
681 static
682 bool isJump(unsigned Opcode) {
683  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
684 }
685 
686 static bool isBranch(unsigned Opcode) {
687  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
688  Opcode == AMDGPU::BRANCH_COND_f32;
689 }
690 
691 bool
693  MachineBasicBlock *&TBB,
694  MachineBasicBlock *&FBB,
696  bool AllowModify) const {
697  // Most of the following comes from the ARM implementation of AnalyzeBranch
698 
699  // If the block has no terminators, it just falls into the block after it.
701  if (I == MBB.end())
702  return false;
703 
704  // AMDGPU::BRANCH* instructions are only available after isel and are not
705  // handled
706  if (isBranch(I->getOpcode()))
707  return true;
708  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
709  return false;
710  }
711 
712  // Remove successive JUMP
713  while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
714  MachineBasicBlock::iterator PriorI = std::prev(I);
715  if (AllowModify)
716  I->removeFromParent();
717  I = PriorI;
718  }
719  MachineInstr *LastInst = I;
720 
721  // If there is only one terminator instruction, process it.
722  unsigned LastOpc = LastInst->getOpcode();
723  if (I == MBB.begin() ||
724  !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
725  if (LastOpc == AMDGPU::JUMP) {
726  TBB = LastInst->getOperand(0).getMBB();
727  return false;
728  } else if (LastOpc == AMDGPU::JUMP_COND) {
729  MachineInstr *predSet = I;
730  while (!isPredicateSetter(predSet->getOpcode())) {
731  predSet = --I;
732  }
733  TBB = LastInst->getOperand(0).getMBB();
734  Cond.push_back(predSet->getOperand(1));
735  Cond.push_back(predSet->getOperand(2));
736  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
737  return false;
738  }
739  return true; // Can't handle indirect branch.
740  }
741 
742  // Get the instruction before it if it is a terminator.
743  MachineInstr *SecondLastInst = I;
744  unsigned SecondLastOpc = SecondLastInst->getOpcode();
745 
746  // If the block ends with a B and a Bcc, handle it.
747  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
748  MachineInstr *predSet = --I;
749  while (!isPredicateSetter(predSet->getOpcode())) {
750  predSet = --I;
751  }
752  TBB = SecondLastInst->getOperand(0).getMBB();
753  FBB = LastInst->getOperand(0).getMBB();
754  Cond.push_back(predSet->getOperand(1));
755  Cond.push_back(predSet->getOperand(2));
756  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
757  return false;
758  }
759 
760  // Otherwise, can't handle this.
761  return true;
762 }
763 
764 static
766  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
767  It != E; ++It) {
768  if (It->getOpcode() == AMDGPU::CF_ALU ||
769  It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
770  return std::prev(It.base());
771  }
772  return MBB.end();
773 }
774 
775 unsigned
777  MachineBasicBlock *TBB,
778  MachineBasicBlock *FBB,
780  DebugLoc DL) const {
781  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
782 
783  if (!FBB) {
784  if (Cond.empty()) {
785  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
786  return 1;
787  } else {
788  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
789  assert(PredSet && "No previous predicate !");
790  addFlag(PredSet, 0, MO_FLAG_PUSH);
791  PredSet->getOperand(2).setImm(Cond[1].getImm());
792 
793  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
794  .addMBB(TBB)
795  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
797  if (CfAlu == MBB.end())
798  return 1;
799  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
800  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
801  return 1;
802  }
803  } else {
804  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
805  assert(PredSet && "No previous predicate !");
806  addFlag(PredSet, 0, MO_FLAG_PUSH);
807  PredSet->getOperand(2).setImm(Cond[1].getImm());
808  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
809  .addMBB(TBB)
810  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
811  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
813  if (CfAlu == MBB.end())
814  return 2;
815  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
816  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
817  return 2;
818  }
819 }
820 
821 unsigned
823 
824  // Note : we leave PRED* instructions there.
825  // They may be needed when predicating instructions.
826 
828 
829  if (I == MBB.begin()) {
830  return 0;
831  }
832  --I;
833  switch (I->getOpcode()) {
834  default:
835  return 0;
836  case AMDGPU::JUMP_COND: {
837  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
838  clearFlag(predSet, 0, MO_FLAG_PUSH);
839  I->eraseFromParent();
841  if (CfAlu == MBB.end())
842  break;
843  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
844  CfAlu->setDesc(get(AMDGPU::CF_ALU));
845  break;
846  }
847  case AMDGPU::JUMP:
848  I->eraseFromParent();
849  break;
850  }
851  I = MBB.end();
852 
853  if (I == MBB.begin()) {
854  return 1;
855  }
856  --I;
857  switch (I->getOpcode()) {
858  // FIXME: only one case??
859  default:
860  return 1;
861  case AMDGPU::JUMP_COND: {
862  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
863  clearFlag(predSet, 0, MO_FLAG_PUSH);
864  I->eraseFromParent();
866  if (CfAlu == MBB.end())
867  break;
868  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
869  CfAlu->setDesc(get(AMDGPU::CF_ALU));
870  break;
871  }
872  case AMDGPU::JUMP:
873  I->eraseFromParent();
874  break;
875  }
876  return 2;
877 }
878 
879 bool
881  int idx = MI->findFirstPredOperandIdx();
882  if (idx < 0)
883  return false;
884 
885  unsigned Reg = MI->getOperand(idx).getReg();
886  switch (Reg) {
887  default: return false;
888  case AMDGPU::PRED_SEL_ONE:
889  case AMDGPU::PRED_SEL_ZERO:
890  case AMDGPU::PREDICATE_BIT:
891  return true;
892  }
893 }
894 
895 bool
897  // XXX: KILL* instructions can be predicated, but they must be the last
898  // instruction in a clause, so this means any instructions after them cannot
899  // be predicated. Until we have proper support for instruction clauses in the
900  // backend, we will mark KILL* instructions as unpredicable.
901 
902  if (MI->getOpcode() == AMDGPU::KILLGT) {
903  return false;
904  } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
905  // If the clause start in the middle of MBB then the MBB has more
906  // than a single clause, unable to predicate several clauses.
907  if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
908  return false;
909  // TODO: We don't support KC merging atm
910  if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
911  return false;
912  return true;
913  } else if (isVector(*MI)) {
914  return false;
915  } else {
917  }
918 }
919 
920 
921 bool
923  unsigned NumCyles,
924  unsigned ExtraPredCycles,
925  const BranchProbability &Probability) const{
926  return true;
927 }
928 
929 bool
931  unsigned NumTCycles,
932  unsigned ExtraTCycles,
933  MachineBasicBlock &FMBB,
934  unsigned NumFCycles,
935  unsigned ExtraFCycles,
936  const BranchProbability &Probability) const {
937  return true;
938 }
939 
940 bool
942  unsigned NumCyles,
943  const BranchProbability &Probability)
944  const {
945  return true;
946 }
947 
948 bool
950  MachineBasicBlock &FMBB) const {
951  return false;
952 }
953 
954 
955 bool
957  MachineOperand &MO = Cond[1];
958  switch (MO.getImm()) {
959  case OPCODE_IS_ZERO_INT:
961  break;
964  break;
965  case OPCODE_IS_ZERO:
967  break;
968  case OPCODE_IS_NOT_ZERO:
970  break;
971  default:
972  return true;
973  }
974 
975  MachineOperand &MO2 = Cond[2];
976  switch (MO2.getReg()) {
977  case AMDGPU::PRED_SEL_ZERO:
978  MO2.setReg(AMDGPU::PRED_SEL_ONE);
979  break;
980  case AMDGPU::PRED_SEL_ONE:
981  MO2.setReg(AMDGPU::PRED_SEL_ZERO);
982  break;
983  default:
984  return true;
985  }
986  return false;
987 }
988 
989 bool
991  std::vector<MachineOperand> &Pred) const {
992  return isPredicateSetter(MI->getOpcode());
993 }
994 
995 
996 bool
998  ArrayRef<MachineOperand> Pred2) const {
999  return false;
1000 }
1001 
1002 
1003 bool
1005  ArrayRef<MachineOperand> Pred) const {
1006  int PIdx = MI->findFirstPredOperandIdx();
1007 
1008  if (MI->getOpcode() == AMDGPU::CF_ALU) {
1009  MI->getOperand(8).setImm(0);
1010  return true;
1011  }
1012 
1013  if (MI->getOpcode() == AMDGPU::DOT_4) {
1014  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
1015  .setReg(Pred[2].getReg());
1016  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
1017  .setReg(Pred[2].getReg());
1018  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
1019  .setReg(Pred[2].getReg());
1020  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
1021  .setReg(Pred[2].getReg());
1022  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1023  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1024  return true;
1025  }
1026 
1027  if (PIdx != -1) {
1028  MachineOperand &PMO = MI->getOperand(PIdx);
1029  PMO.setReg(Pred[2].getReg());
1030  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1031  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1032  return true;
1033  }
1034 
1035  return false;
1036 }
1037 
1039  return 2;
1040 }
1041 
1043  const MachineInstr *MI,
1044  unsigned *PredCost) const {
1045  if (PredCost)
1046  *PredCost = 2;
1047  return 2;
1048 }
1049 
1051 
1052  switch(MI->getOpcode()) {
1053  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
1054  case AMDGPU::R600_EXTRACT_ELT_V2:
1055  case AMDGPU::R600_EXTRACT_ELT_V4:
1056  buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
1057  RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
1058  MI->getOperand(2).getReg(),
1059  RI.getHWRegChan(MI->getOperand(1).getReg()));
1060  break;
1061  case AMDGPU::R600_INSERT_ELT_V2:
1062  case AMDGPU::R600_INSERT_ELT_V4:
1063  buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
1064  RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
1065  MI->getOperand(3).getReg(), // Offset
1066  RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel
1067  break;
1068  }
1069  MI->eraseFromParent();
1070  return true;
1071 }
1072 
1074  const MachineFunction &MF) const {
1075  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
1077 
1078  unsigned StackWidth = TFL->getStackWidth(MF);
1079  int End = getIndirectIndexEnd(MF);
1080 
1081  if (End == -1)
1082  return;
1083 
1084  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1085  unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1086  Reserved.set(SuperReg);
1087  for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1088  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1089  Reserved.set(Reg);
1090  }
1091  }
1092 }
1093 
1094 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1095  unsigned Channel) const {
1096  // XXX: Remove when we support a stack width > 2
1097  assert(Channel == 0);
1098  return RegIndex;
1099 }
1100 
1102  return &AMDGPU::R600_TReg32_XRegClass;
1103 }
1104 
1105 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1107  unsigned ValueReg, unsigned Address,
1108  unsigned OffsetReg) const {
1109  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
1110 }
1111 
1112 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1114  unsigned ValueReg, unsigned Address,
1115  unsigned OffsetReg,
1116  unsigned AddrChan) const {
1117  unsigned AddrReg;
1118  switch (AddrChan) {
1119  default: llvm_unreachable("Invalid Channel");
1120  case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1121  case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1122  case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1123  case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1124  }
1125  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1126  AMDGPU::AR_X, OffsetReg);
1128 
1129  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1130  AddrReg, ValueReg)
1131  .addReg(AMDGPU::AR_X,
1133  setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
1134  return Mov;
1135 }
1136 
1137 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1139  unsigned ValueReg, unsigned Address,
1140  unsigned OffsetReg) const {
1141  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
1142 }
1143 
1144 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1146  unsigned ValueReg, unsigned Address,
1147  unsigned OffsetReg,
1148  unsigned AddrChan) const {
1149  unsigned AddrReg;
1150  switch (AddrChan) {
1151  default: llvm_unreachable("Invalid Channel");
1152  case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1153  case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1154  case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1155  case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1156  }
1157  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1158  AMDGPU::AR_X,
1159  OffsetReg);
1161  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1162  ValueReg,
1163  AddrReg)
1164  .addReg(AMDGPU::AR_X,
1166  setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
1167 
1168  return Mov;
1169 }
1170 
1172  return 115;
1173 }
1174 
1177  unsigned Opcode,
1178  unsigned DstReg,
1179  unsigned Src0Reg,
1180  unsigned Src1Reg) const {
1181  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1182  DstReg); // $dst
1183 
1184  if (Src1Reg) {
1185  MIB.addImm(0) // $update_exec_mask
1186  .addImm(0); // $update_predicate
1187  }
1188  MIB.addImm(1) // $write
1189  .addImm(0) // $omod
1190  .addImm(0) // $dst_rel
1191  .addImm(0) // $dst_clamp
1192  .addReg(Src0Reg) // $src0
1193  .addImm(0) // $src0_neg
1194  .addImm(0) // $src0_rel
1195  .addImm(0) // $src0_abs
1196  .addImm(-1); // $src0_sel
1197 
1198  if (Src1Reg) {
1199  MIB.addReg(Src1Reg) // $src1
1200  .addImm(0) // $src1_neg
1201  .addImm(0) // $src1_rel
1202  .addImm(0) // $src1_abs
1203  .addImm(-1); // $src1_sel
1204  }
1205 
1206  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1207  //scheduling to the backend, we can change the default to 0.
1208  MIB.addImm(1) // $last
1209  .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1210  .addImm(0) // $literal
1211  .addImm(0); // $bank_swizzle
1212 
1213  return MIB;
1214 }
1215 
1216 #define OPERAND_CASE(Label) \
1217  case Label: { \
1218  static const unsigned Ops[] = \
1219  { \
1220  Label##_X, \
1221  Label##_Y, \
1222  Label##_Z, \
1223  Label##_W \
1224  }; \
1225  return Ops[Slot]; \
1226  }
1227 
1228 static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
1229  switch (Op) {
1230  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1231  OPERAND_CASE(AMDGPU::OpName::update_pred)
1233  OPERAND_CASE(AMDGPU::OpName::omod)
1234  OPERAND_CASE(AMDGPU::OpName::dst_rel)
1235  OPERAND_CASE(AMDGPU::OpName::clamp)
1236  OPERAND_CASE(AMDGPU::OpName::src0)
1237  OPERAND_CASE(AMDGPU::OpName::src0_neg)
1238  OPERAND_CASE(AMDGPU::OpName::src0_rel)
1239  OPERAND_CASE(AMDGPU::OpName::src0_abs)
1240  OPERAND_CASE(AMDGPU::OpName::src0_sel)
1241  OPERAND_CASE(AMDGPU::OpName::src1)
1242  OPERAND_CASE(AMDGPU::OpName::src1_neg)
1243  OPERAND_CASE(AMDGPU::OpName::src1_rel)
1244  OPERAND_CASE(AMDGPU::OpName::src1_abs)
1245  OPERAND_CASE(AMDGPU::OpName::src1_sel)
1246  OPERAND_CASE(AMDGPU::OpName::pred_sel)
1247  default:
1248  llvm_unreachable("Wrong Operand");
1249  }
1250 }
1251 
1252 #undef OPERAND_CASE
1253 
1255  MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1256  const {
1257  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1258  unsigned Opcode;
1260  Opcode = AMDGPU::DOT4_r600;
1261  else
1262  Opcode = AMDGPU::DOT4_eg;
1264  MachineOperand &Src0 = MI->getOperand(
1265  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1266  MachineOperand &Src1 = MI->getOperand(
1267  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1269  MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1270  static const unsigned Operands[14] = {
1271  AMDGPU::OpName::update_exec_mask,
1272  AMDGPU::OpName::update_pred,
1274  AMDGPU::OpName::omod,
1275  AMDGPU::OpName::dst_rel,
1276  AMDGPU::OpName::clamp,
1277  AMDGPU::OpName::src0_neg,
1278  AMDGPU::OpName::src0_rel,
1279  AMDGPU::OpName::src0_abs,
1280  AMDGPU::OpName::src0_sel,
1281  AMDGPU::OpName::src1_neg,
1282  AMDGPU::OpName::src1_rel,
1283  AMDGPU::OpName::src1_abs,
1284  AMDGPU::OpName::src1_sel,
1285  };
1286 
1288  getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1289  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1290  .setReg(MO.getReg());
1291 
1292  for (unsigned i = 0; i < 14; i++) {
1293  MachineOperand &MO = MI->getOperand(
1294  getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1295  assert (MO.isImm());
1296  setImmOperand(MIB, Operands[i], MO.getImm());
1297  }
1298  MIB->getOperand(20).setImm(0);
1299  return MIB;
1300 }
1301 
1304  unsigned DstReg,
1305  uint64_t Imm) const {
1306  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1307  AMDGPU::ALU_LITERAL_X);
1308  setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
1309  return MovImm;
1310 }
1311 
1314  unsigned DstReg, unsigned SrcReg) const {
1315  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1316 }
1317 
1318 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1319  return getOperandIdx(MI.getOpcode(), Op);
1320 }
1321 
1322 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1323  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1324 }
1325 
1327  int64_t Imm) const {
1328  int Idx = getOperandIdx(*MI, Op);
1329  assert(Idx != -1 && "Operand not supported for this instruction.");
1330  assert(MI->getOperand(Idx).isImm());
1331  MI->getOperand(Idx).setImm(Imm);
1332 }
1333 
1334 //===----------------------------------------------------------------------===//
1335 // Instruction flag getters/setters
1336 //===----------------------------------------------------------------------===//
1337 
1339  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
1340 }
1341 
1343  unsigned Flag) const {
1344  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1345  int FlagIndex = 0;
1346  if (Flag != 0) {
1347  // If we pass something other than the default value of Flag to this
1348  // function, it means we are want to set a flag on an instruction
1349  // that uses native encoding.
1350  assert(HAS_NATIVE_OPERANDS(TargetFlags));
1351  bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1352  switch (Flag) {
1353  case MO_FLAG_CLAMP:
1354  FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
1355  break;
1356  case MO_FLAG_MASK:
1357  FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
1358  break;
1359  case MO_FLAG_NOT_LAST:
1360  case MO_FLAG_LAST:
1361  FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
1362  break;
1363  case MO_FLAG_NEG:
1364  switch (SrcIdx) {
1365  case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
1366  case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
1367  case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
1368  }
1369  break;
1370 
1371  case MO_FLAG_ABS:
1372  assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1373  "instructions.");
1374  (void)IsOP3;
1375  switch (SrcIdx) {
1376  case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
1377  case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
1378  }
1379  break;
1380 
1381  default:
1382  FlagIndex = -1;
1383  break;
1384  }
1385  assert(FlagIndex != -1 && "Flag not supported for this instruction");
1386  } else {
1387  FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1388  assert(FlagIndex != 0 &&
1389  "Instruction flags not supported for this instruction");
1390  }
1391 
1392  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
1393  assert(FlagOp.isImm());
1394  return FlagOp;
1395 }
1396 
1397 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
1398  unsigned Flag) const {
1399  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1400  if (Flag == 0) {
1401  return;
1402  }
1403  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1404  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1405  if (Flag == MO_FLAG_NOT_LAST) {
1406  clearFlag(MI, Operand, MO_FLAG_LAST);
1407  } else if (Flag == MO_FLAG_MASK) {
1408  clearFlag(MI, Operand, Flag);
1409  } else {
1410  FlagOp.setImm(1);
1411  }
1412  } else {
1413  MachineOperand &FlagOp = getFlagOp(MI, Operand);
1414  FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1415  }
1416 }
1417 
1418 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
1419  unsigned Flag) const {
1420  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1421  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1422  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1423  FlagOp.setImm(0);
1424  } else {
1425  MachineOperand &FlagOp = getFlagOp(MI);
1426  unsigned InstFlags = FlagOp.getImm();
1427  InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1428  FlagOp.setImm(InstFlags);
1429  }
1430 }
MachineInstr * buildSlotOfVectorInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) const
bool readsLDSSrcReg(const MachineInstr *MI) const
bool hasCaymanISA() const
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const
Clear the specified flag on the instruction.
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
BitVector & set()
Definition: BitVector.h:218
mop_iterator operands_end()
Definition: MachineInstr.h:290
void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
bool isLDSInstr(unsigned Opcode) const
AMDGPU specific subclass of TargetSubtarget.
unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const override
Calculate the "Indirect Address" for the given RegIndex and Channel.
int getSrcIdx(unsigned Opcode, unsigned SrcNum) const
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
#define MO_FLAG_LAST
Definition: R600Defines.h:23
#define NUM_MO_FLAGS
Definition: R600Defines.h:24
bool canBeConsideredALU(const MachineInstr *MI) const
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, DebugLoc DL) const override
#define OPCODE_IS_ZERO
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool fitsReadPortLimitations(const std::vector< MachineInstr * > &MIs, const DenseMap< unsigned, unsigned > &PV, std::vector< BankSwizzle > &BS, bool isLastAluTrans) const
Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 returns true and the first ...
static unsigned getSlotedOps(unsigned Op, unsigned Slot)
static bool isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, const std::vector< std::pair< int, unsigned > > &TransOps, unsigned ConstCount)
Instructions in Trans slot can't read gpr at cycle 0 if they also read a const, and can't read a gpr ...
#define OPCODE_IS_NOT_ZERO_INT
bool hasVertexCache() const
A debug info location.
Definition: DebugLoc.h:34
bool isPlaceHolderOpcode(unsigned opcode) const
Interface definition for R600RegisterInfo.
bool isVectorOnly(unsigned Opcode) const
bool hasFlagOperand(const MachineInstr &MI) const
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const R600RegisterInfo & getRegisterInfo() const override
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
bool isCubeOp(unsigned opcode) const
static MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const AMDGPUSubtarget & ST
static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op)
#define MO_FLAG_ABS
Definition: R600Defines.h:19
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
Reg
All possible values of the reg field in the ModR/M byte.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
size_type size() const
Definition: SmallSet.h:48
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:317
MachineInstr * buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const override
Build a MOV instruction.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
#define HAS_NATIVE_OPERANDS(Flags)
Definition: R600Defines.h:53
bool DefinesPredicate(MachineInstr *MI, std::vector< MachineOperand > &Pred) const override
bool hasInstrModifiers(unsigned Opcode) const
static bool isJump(unsigned Opcode)
bool isPredicable(MachineInstr *MI) const override
bool usesVertexCache(unsigned Opcode) const
#define OPCODE_IS_NOT_ZERO
R600InstrInfo(const AMDGPUSubtarget &st)
#define MO_FLAG_NEG
Definition: R600Defines.h:18
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
Itinerary data supplied by a subtarget to be used by a target.
iterator getLastNonDebugInstr()
getLastNonDebugInstr - returns an iterator to the last non-debug instruction in the basic block...
MachineOperand & getFlagOp(MachineInstr *MI, unsigned SrcIdx=0, unsigned Flag=0) const
bool usesTextureCache(unsigned Opcode) const
int64_t getImm() const
Generation getGeneration() const
reverse_iterator rend()
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
bool isExport(unsigned Opcode) const
bundle_iterator< MachineInstr, instr_iterator > iterator
SmallVector< std::pair< MachineOperand *, int64_t >, 3 > getSrcs(MachineInstr *MI) const
unsigned getHWRegIndex(unsigned Reg) const override
bool isPredicated(const MachineInstr *MI) const override
bool FindSwizzleForVectorSlot(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
Enumerate all possible Swizzle sequence to find one that can meet all read port requirements.
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
DebugLoc findDebugLoc(instr_iterator MBBI)
findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
bundle_iterator - MachineBasicBlock iterator that automatically skips over MIs that are inside bundle...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
unsigned getSubRegFromChannel(unsigned Channel) const
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, const BranchProbability &Probability) const override
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override
bool isPredicable(MachineInstr *MI) const override
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
static MachineInstr * findFirstPredicateSetterFrom(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
unsigned RemoveBranch(MachineBasicBlock &MBB) const override
SI Fold Operands
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:129
The AMDGPU TargetMachine interface definition for hw codgen targets.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
void setIsKill(bool Val=true)
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, const BranchProbability &Probability) const override
static bool NextPossibleSolution(std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, unsigned Idx)
Given a swizzle sequence SwzCandidate and an index Idx, returns the next (in lexicographic term) swiz...
virtual const TargetFrameLowering * getFrameLowering() const
unsigned int getPredicationCost(const MachineInstr *) const override
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
bool usesAddressRegister(MachineInstr *MI) const
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool isPhysRegLiveAcrossClauses(unsigned Reg) const
#define OPERAND_CASE(Label)
unsigned getStackWidth(const MachineFunction &MF) const
#define IS_TEX(desc)
Definition: R600Defines.h:63
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
bool isTrig(const MachineInstr &MI) const
bool mustBeLastInClause(unsigned Opcode) const
static bool isPredicateSetter(unsigned Opcode)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
void write(void *memory, value_type value)
Write a value to memory with a particular endianness.
Definition: Endian.h:73
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
bool ReverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
int getIndirectIndexEnd(const MachineFunction &MF) const
unsigned int getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost=nullptr) const override
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
Definition: MachineInstr.h:51
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const
Reserve the registers that may be accesed using indirect addressing.
#define MO_FLAG_NOT_LAST
Definition: R600Defines.h:22
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
#define GET_FLAG_OPERAND_IDX(Flags)
Helper for getting the operand index for the instruction flags operand.
Definition: R600Defines.h:28
Information about the stack frame layout on the AMDGPU targets.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
unsigned isLegalUpTo(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, const std::vector< R600InstrInfo::BankSwizzle > &Swz, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
returns how many MIs (whose inputs are represented by IGSrcs) can be packed in the same Instruction G...
#define I(x, y, z)
Definition: MD5.cpp:54
iterator end()
Definition: DenseMap.h:68
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
unsigned getMaxAlusPerClause() const
bool isReductionOp(unsigned opcode) const
const TargetRegisterClass * getIndirectAddrRegClass() const override
unsigned getReg() const
getReg - Returns the register number.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const override
bool isMov(unsigned Opcode) const override
static bool isBranch(unsigned Opcode)
std::reverse_iterator< iterator > reverse_iterator
mop_iterator operands_begin()
Definition: MachineInstr.h:289
bool definesAddressRegister(MachineInstr *MI) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
int getIndirectIndexBegin(const MachineFunction &MF) const
#define GET_REG_INDEX(reg)
Definition: R600Defines.h:60
#define IS_VTX(desc)
Definition: R600Defines.h:62
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
#define MO_FLAG_MASK
Definition: R600Defines.h:20
bool isLDSNoRetInstr(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
bool isLDSRetInstr(unsigned Opcode) const
bool PredicateInstruction(MachineInstr *MI, ArrayRef< MachineOperand > Pred) const override
bool isALUInstr(unsigned Opcode) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
bool isTransOnly(unsigned Opcode) const
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
#define OPCODE_IS_ZERO_INT
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override