LLVM  3.7.0
SIInstrInfo.cpp
Go to the documentation of this file.
1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "SIInstrInfo.h"
17 #include "AMDGPUTargetMachine.h"
18 #include "SIDefines.h"
19 #include "SIMachineFunctionInfo.h"
23 #include "llvm/IR/Function.h"
25 #include "llvm/MC/MCInstrDesc.h"
26 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
31  : AMDGPUInstrInfo(st), RI() {}
32 
33 //===----------------------------------------------------------------------===//
34 // TargetInstrInfo callbacks
35 //===----------------------------------------------------------------------===//
36 
37 static unsigned getNumOperandsNoGlue(SDNode *Node) {
38  unsigned N = Node->getNumOperands();
39  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
40  --N;
41  return N;
42 }
43 
45  SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
46  assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
47  return LastOp;
48 }
49 
50 /// \brief Returns true if both nodes have the same value for the given
51 /// operand \p Op, or if both nodes do not have this operand.
52 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
53  unsigned Opc0 = N0->getMachineOpcode();
54  unsigned Opc1 = N1->getMachineOpcode();
55 
56  int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
57  int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
58 
59  if (Op0Idx == -1 && Op1Idx == -1)
60  return true;
61 
62 
63  if ((Op0Idx == -1 && Op1Idx != -1) ||
64  (Op1Idx == -1 && Op0Idx != -1))
65  return false;
66 
67  // getNamedOperandIdx returns the index for the MachineInstr's operands,
68  // which includes the result as the first operand. We are indexing into the
69  // MachineSDNode's operands, so we need to skip the result operand to get
70  // the real index.
71  --Op0Idx;
72  --Op1Idx;
73 
74  return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
75 }
76 
78  AliasAnalysis *AA) const {
79  // TODO: The generic check fails for VALU instructions that should be
80  // rematerializable due to implicit reads of exec. We really want all of the
81  // generic logic for this except for this.
82  switch (MI->getOpcode()) {
83  case AMDGPU::V_MOV_B32_e32:
84  case AMDGPU::V_MOV_B32_e64:
85  return true;
86  default:
87  return false;
88  }
89 }
90 
92  int64_t &Offset0,
93  int64_t &Offset1) const {
94  if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
95  return false;
96 
97  unsigned Opc0 = Load0->getMachineOpcode();
98  unsigned Opc1 = Load1->getMachineOpcode();
99 
100  // Make sure both are actually loads.
101  if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
102  return false;
103 
104  if (isDS(Opc0) && isDS(Opc1)) {
105 
106  // FIXME: Handle this case:
107  if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
108  return false;
109 
110  // Check base reg.
111  if (Load0->getOperand(1) != Load1->getOperand(1))
112  return false;
113 
114  // Check chain.
115  if (findChainOperand(Load0) != findChainOperand(Load1))
116  return false;
117 
118  // Skip read2 / write2 variants for simplicity.
119  // TODO: We should report true if the used offsets are adjacent (excluded
120  // st64 versions).
121  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
122  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
123  return false;
124 
125  Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
126  Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
127  return true;
128  }
129 
130  if (isSMRD(Opc0) && isSMRD(Opc1)) {
131  assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
132 
133  // Check base reg.
134  if (Load0->getOperand(0) != Load1->getOperand(0))
135  return false;
136 
137  const ConstantSDNode *Load0Offset =
138  dyn_cast<ConstantSDNode>(Load0->getOperand(1));
139  const ConstantSDNode *Load1Offset =
140  dyn_cast<ConstantSDNode>(Load1->getOperand(1));
141 
142  if (!Load0Offset || !Load1Offset)
143  return false;
144 
145  // Check chain.
146  if (findChainOperand(Load0) != findChainOperand(Load1))
147  return false;
148 
149  Offset0 = Load0Offset->getZExtValue();
150  Offset1 = Load1Offset->getZExtValue();
151  return true;
152  }
153 
154  // MUBUF and MTBUF can access the same addresses.
155  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
156 
157  // MUBUF and MTBUF have vaddr at different indices.
158  if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
159  findChainOperand(Load0) != findChainOperand(Load1) ||
160  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
161  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
162  return false;
163 
164  int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
165  int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
166 
167  if (OffIdx0 == -1 || OffIdx1 == -1)
168  return false;
169 
170  // getNamedOperandIdx returns the index for MachineInstrs. Since they
171  // inlcude the output in the operand list, but SDNodes don't, we need to
172  // subtract the index by one.
173  --OffIdx0;
174  --OffIdx1;
175 
176  SDValue Off0 = Load0->getOperand(OffIdx0);
177  SDValue Off1 = Load1->getOperand(OffIdx1);
178 
179  // The offset might be a FrameIndexSDNode.
180  if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
181  return false;
182 
183  Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
184  Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
185  return true;
186  }
187 
188  return false;
189 }
190 
191 static bool isStride64(unsigned Opc) {
192  switch (Opc) {
193  case AMDGPU::DS_READ2ST64_B32:
194  case AMDGPU::DS_READ2ST64_B64:
195  case AMDGPU::DS_WRITE2ST64_B32:
196  case AMDGPU::DS_WRITE2ST64_B64:
197  return true;
198  default:
199  return false;
200  }
201 }
202 
203 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
204  unsigned &Offset,
205  const TargetRegisterInfo *TRI) const {
206  unsigned Opc = LdSt->getOpcode();
207  if (isDS(Opc)) {
208  const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
209  AMDGPU::OpName::offset);
210  if (OffsetImm) {
211  // Normal, single offset LDS instruction.
212  const MachineOperand *AddrReg = getNamedOperand(*LdSt,
214 
215  BaseReg = AddrReg->getReg();
216  Offset = OffsetImm->getImm();
217  return true;
218  }
219 
220  // The 2 offset instructions use offset0 and offset1 instead. We can treat
221  // these as a load with a single offset if the 2 offsets are consecutive. We
222  // will use this for some partially aligned loads.
223  const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
224  AMDGPU::OpName::offset0);
225  const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
226  AMDGPU::OpName::offset1);
227 
228  uint8_t Offset0 = Offset0Imm->getImm();
229  uint8_t Offset1 = Offset1Imm->getImm();
230 
231  if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
232  // Each of these offsets is in element sized units, so we need to convert
233  // to bytes of the individual reads.
234 
235  unsigned EltSize;
236  if (LdSt->mayLoad())
237  EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
238  else {
239  assert(LdSt->mayStore());
240  int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
241  EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
242  }
243 
244  if (isStride64(Opc))
245  EltSize *= 64;
246 
247  const MachineOperand *AddrReg = getNamedOperand(*LdSt,
249  BaseReg = AddrReg->getReg();
250  Offset = EltSize * Offset0;
251  return true;
252  }
253 
254  return false;
255  }
256 
257  if (isMUBUF(Opc) || isMTBUF(Opc)) {
258  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
259  return false;
260 
261  const MachineOperand *AddrReg = getNamedOperand(*LdSt,
262  AMDGPU::OpName::vaddr);
263  if (!AddrReg)
264  return false;
265 
266  const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
267  AMDGPU::OpName::offset);
268  BaseReg = AddrReg->getReg();
269  Offset = OffsetImm->getImm();
270  return true;
271  }
272 
273  if (isSMRD(Opc)) {
274  const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
275  AMDGPU::OpName::offset);
276  if (!OffsetImm)
277  return false;
278 
279  const MachineOperand *SBaseReg = getNamedOperand(*LdSt,
280  AMDGPU::OpName::sbase);
281  BaseReg = SBaseReg->getReg();
282  Offset = OffsetImm->getImm();
283  return true;
284  }
285 
286  return false;
287 }
288 
290  MachineInstr *SecondLdSt,
291  unsigned NumLoads) const {
292  unsigned Opc0 = FirstLdSt->getOpcode();
293  unsigned Opc1 = SecondLdSt->getOpcode();
294 
295  // TODO: This needs finer tuning
296  if (NumLoads > 4)
297  return false;
298 
299  if (isDS(Opc0) && isDS(Opc1))
300  return true;
301 
302  if (isSMRD(Opc0) && isSMRD(Opc1))
303  return true;
304 
305  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
306  return true;
307 
308  return false;
309 }
310 
311 void
314  unsigned DestReg, unsigned SrcReg,
315  bool KillSrc) const {
316 
317  // If we are trying to copy to or from SCC, there is a bug somewhere else in
318  // the backend. While it may be theoretically possible to do this, it should
319  // never be necessary.
320  assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
321 
322  static const int16_t Sub0_15[] = {
323  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
324  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
325  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
326  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
327  };
328 
329  static const int16_t Sub0_7[] = {
330  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
331  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
332  };
333 
334  static const int16_t Sub0_3[] = {
335  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
336  };
337 
338  static const int16_t Sub0_2[] = {
339  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
340  };
341 
342  static const int16_t Sub0_1[] = {
343  AMDGPU::sub0, AMDGPU::sub1, 0
344  };
345 
346  unsigned Opcode;
347  const int16_t *SubIndices;
348 
349  if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
350  assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
351  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
352  .addReg(SrcReg, getKillRegState(KillSrc));
353  return;
354 
355  } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
356  if (DestReg == AMDGPU::VCC) {
357  if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
358  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
359  .addReg(SrcReg, getKillRegState(KillSrc));
360  } else {
361  // FIXME: Hack until VReg_1 removed.
362  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
363  BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
364  .addImm(0)
365  .addReg(SrcReg, getKillRegState(KillSrc));
366  }
367 
368  return;
369  }
370 
371  assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
372  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
373  .addReg(SrcReg, getKillRegState(KillSrc));
374  return;
375 
376  } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
377  assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
378  Opcode = AMDGPU::S_MOV_B32;
379  SubIndices = Sub0_3;
380 
381  } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
382  assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
383  Opcode = AMDGPU::S_MOV_B32;
384  SubIndices = Sub0_7;
385 
386  } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
387  assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
388  Opcode = AMDGPU::S_MOV_B32;
389  SubIndices = Sub0_15;
390 
391  } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
392  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
393  AMDGPU::SReg_32RegClass.contains(SrcReg));
394  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
395  .addReg(SrcReg, getKillRegState(KillSrc));
396  return;
397 
398  } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
399  assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
400  AMDGPU::SReg_64RegClass.contains(SrcReg));
401  Opcode = AMDGPU::V_MOV_B32_e32;
402  SubIndices = Sub0_1;
403 
404  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
405  assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
406  Opcode = AMDGPU::V_MOV_B32_e32;
407  SubIndices = Sub0_2;
408 
409  } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
410  assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
411  AMDGPU::SReg_128RegClass.contains(SrcReg));
412  Opcode = AMDGPU::V_MOV_B32_e32;
413  SubIndices = Sub0_3;
414 
415  } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
416  assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
417  AMDGPU::SReg_256RegClass.contains(SrcReg));
418  Opcode = AMDGPU::V_MOV_B32_e32;
419  SubIndices = Sub0_7;
420 
421  } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
422  assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
423  AMDGPU::SReg_512RegClass.contains(SrcReg));
424  Opcode = AMDGPU::V_MOV_B32_e32;
425  SubIndices = Sub0_15;
426 
427  } else {
428  llvm_unreachable("Can't copy register!");
429  }
430 
431  while (unsigned SubIdx = *SubIndices++) {
432  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
433  get(Opcode), RI.getSubReg(DestReg, SubIdx));
434 
435  Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
436 
437  if (*SubIndices)
438  Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
439  }
440 }
441 
443  const unsigned Opcode = MI.getOpcode();
444 
445  int NewOpc;
446 
447  // Try to map original to commuted opcode
448  NewOpc = AMDGPU::getCommuteRev(Opcode);
449  if (NewOpc != -1)
450  // Check if the commuted (REV) opcode exists on the target.
451  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
452 
453  // Try to map commuted to original opcode
454  NewOpc = AMDGPU::getCommuteOrig(Opcode);
455  if (NewOpc != -1)
456  // Check if the original (non-REV) opcode exists on the target.
457  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
458 
459  return Opcode;
460 }
461 
462 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
463 
464  if (DstRC->getSize() == 4) {
465  return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
466  } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
467  return AMDGPU::S_MOV_B64;
468  } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
469  return AMDGPU::V_MOV_B64_PSEUDO;
470  }
471  return AMDGPU::COPY;
472 }
473 
476  unsigned SrcReg, bool isKill,
477  int FrameIndex,
478  const TargetRegisterClass *RC,
479  const TargetRegisterInfo *TRI) const {
480  MachineFunction *MF = MBB.getParent();
482  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
483  DebugLoc DL = MBB.findDebugLoc(MI);
484  int Opcode = -1;
485 
486  if (RI.isSGPRClass(RC)) {
487  // We are only allowed to create one new instruction when spilling
488  // registers, so we need to use pseudo instruction for spilling
489  // SGPRs.
490  switch (RC->getSize() * 8) {
491  case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break;
492  case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
493  case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
494  case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
495  case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
496  }
497  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
498  MFI->setHasSpilledVGPRs();
499 
500  switch(RC->getSize() * 8) {
501  case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
502  case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
503  case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
504  case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
505  case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
506  case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
507  }
508  }
509 
510  if (Opcode != -1) {
511  FrameInfo->setObjectAlignment(FrameIndex, 4);
512  BuildMI(MBB, MI, DL, get(Opcode))
513  .addReg(SrcReg)
514  .addFrameIndex(FrameIndex)
515  // Place-holder registers, these will be filled in by
516  // SIPrepareScratchRegs.
517  .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
518  .addReg(AMDGPU::SGPR0, RegState::Undef);
519  } else {
520  LLVMContext &Ctx = MF->getFunction()->getContext();
521  Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
522  " spill register");
523  BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
524  .addReg(SrcReg);
525  }
526 }
527 
530  unsigned DestReg, int FrameIndex,
531  const TargetRegisterClass *RC,
532  const TargetRegisterInfo *TRI) const {
533  MachineFunction *MF = MBB.getParent();
535  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
536  DebugLoc DL = MBB.findDebugLoc(MI);
537  int Opcode = -1;
538 
539  if (RI.isSGPRClass(RC)){
540  switch(RC->getSize() * 8) {
541  case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
542  case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
543  case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
544  case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
545  case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
546  }
547  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
548  switch(RC->getSize() * 8) {
549  case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
550  case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
551  case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
552  case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
553  case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
554  case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
555  }
556  }
557 
558  if (Opcode != -1) {
559  FrameInfo->setObjectAlignment(FrameIndex, 4);
560  BuildMI(MBB, MI, DL, get(Opcode), DestReg)
561  .addFrameIndex(FrameIndex)
562  // Place-holder registers, these will be filled in by
563  // SIPrepareScratchRegs.
564  .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
565  .addReg(AMDGPU::SGPR0, RegState::Undef);
566 
567  } else {
568  LLVMContext &Ctx = MF->getFunction()->getContext();
569  Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
570  " restore register");
571  BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
572  }
573 }
574 
575 /// \param @Offset Offset in bytes of the FrameIndex being spilled
578  RegScavenger *RS, unsigned TmpReg,
579  unsigned FrameOffset,
580  unsigned Size) const {
581  MachineFunction *MF = MBB.getParent();
584  const SIRegisterInfo *TRI =
585  static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
586  DebugLoc DL = MBB.findDebugLoc(MI);
587  unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
588  unsigned WavefrontSize = ST.getWavefrontSize();
589 
590  unsigned TIDReg = MFI->getTIDReg();
591  if (!MFI->hasCalculatedTID()) {
592  MachineBasicBlock &Entry = MBB.getParent()->front();
593  MachineBasicBlock::iterator Insert = Entry.front();
594  DebugLoc DL = Insert->getDebugLoc();
595 
596  TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
597  if (TIDReg == AMDGPU::NoRegister)
598  return TIDReg;
599 
600 
601  if (MFI->getShaderType() == ShaderType::COMPUTE &&
602  WorkGroupSize > WavefrontSize) {
603 
604  unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
605  unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
606  unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
607  unsigned InputPtrReg =
608  TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
609  for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
610  if (!Entry.isLiveIn(Reg))
611  Entry.addLiveIn(Reg);
612  }
613 
614  RS->enterBasicBlock(&Entry);
615  unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
616  unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
617  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
618  .addReg(InputPtrReg)
620  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
621  .addReg(InputPtrReg)
623 
624  // NGROUPS.X * NGROUPS.Y
625  BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
626  .addReg(STmp1)
627  .addReg(STmp0);
628  // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
629  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
630  .addReg(STmp1)
631  .addReg(TIDIGXReg);
632  // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
633  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
634  .addReg(STmp0)
635  .addReg(TIDIGYReg)
636  .addReg(TIDReg);
637  // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
638  BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
639  .addReg(TIDReg)
640  .addReg(TIDIGZReg);
641  } else {
642  // Get the wave id
643  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
644  TIDReg)
645  .addImm(-1)
646  .addImm(0);
647 
648  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
649  TIDReg)
650  .addImm(-1)
651  .addReg(TIDReg);
652  }
653 
654  BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
655  TIDReg)
656  .addImm(2)
657  .addReg(TIDReg);
658  MFI->setTIDReg(TIDReg);
659  }
660 
661  // Add FrameIndex to LDS offset
662  unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
663  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
664  .addImm(LDSOffset)
665  .addReg(TIDReg);
666 
667  return TmpReg;
668 }
669 
671  int Count) const {
672  while (Count > 0) {
673  int Arg;
674  if (Count >= 8)
675  Arg = 7;
676  else
677  Arg = Count - 1;
678  Count -= 8;
679  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
680  .addImm(Arg);
681  }
682 }
683 
685  MachineBasicBlock &MBB = *MI->getParent();
686  DebugLoc DL = MBB.findDebugLoc(MI);
687  switch (MI->getOpcode()) {
688  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
689 
690  case AMDGPU::SI_CONSTDATA_PTR: {
691  unsigned Reg = MI->getOperand(0).getReg();
692  unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
693  unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
694 
695  BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
696 
697  // Add 32-bit offset from this instruction to the start of the constant data.
698  BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
699  .addReg(RegLo)
701  .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
702  BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
703  .addReg(RegHi)
704  .addImm(0)
706  .addReg(AMDGPU::SCC, RegState::Implicit);
707  MI->eraseFromParent();
708  break;
709  }
710  case AMDGPU::SGPR_USE:
711  // This is just a placeholder for register allocation.
712  MI->eraseFromParent();
713  break;
714 
715  case AMDGPU::V_MOV_B64_PSEUDO: {
716  unsigned Dst = MI->getOperand(0).getReg();
717  unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
718  unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
719 
720  const MachineOperand &SrcOp = MI->getOperand(1);
721  // FIXME: Will this work for 64-bit floating point immediates?
722  assert(!SrcOp.isFPImm());
723  if (SrcOp.isImm()) {
724  APInt Imm(64, SrcOp.getImm());
725  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
726  .addImm(Imm.getLoBits(32).getZExtValue())
727  .addReg(Dst, RegState::Implicit);
728  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
729  .addImm(Imm.getHiBits(32).getZExtValue())
730  .addReg(Dst, RegState::Implicit);
731  } else {
732  assert(SrcOp.isReg());
733  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
734  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
735  .addReg(Dst, RegState::Implicit);
736  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
737  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
738  .addReg(Dst, RegState::Implicit);
739  }
740  MI->eraseFromParent();
741  break;
742  }
743 
744  case AMDGPU::V_CNDMASK_B64_PSEUDO: {
745  unsigned Dst = MI->getOperand(0).getReg();
746  unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
747  unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
748  unsigned Src0 = MI->getOperand(1).getReg();
749  unsigned Src1 = MI->getOperand(2).getReg();
750  const MachineOperand &SrcCond = MI->getOperand(3);
751 
752  BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
753  .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
754  .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
755  .addOperand(SrcCond);
756  BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
757  .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
758  .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
759  .addOperand(SrcCond);
760  MI->eraseFromParent();
761  break;
762  }
763  }
764  return true;
765 }
766 
768  bool NewMI) const {
769 
770  if (MI->getNumOperands() < 3)
771  return nullptr;
772 
773  int CommutedOpcode = commuteOpcode(*MI);
774  if (CommutedOpcode == -1)
775  return nullptr;
776 
777  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
778  AMDGPU::OpName::src0);
779  assert(Src0Idx != -1 && "Should always have src0 operand");
780 
781  MachineOperand &Src0 = MI->getOperand(Src0Idx);
782  if (!Src0.isReg())
783  return nullptr;
784 
785  int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
786  AMDGPU::OpName::src1);
787  if (Src1Idx == -1)
788  return nullptr;
789 
790  MachineOperand &Src1 = MI->getOperand(Src1Idx);
791 
792  // Make sure it's legal to commute operands for VOP2.
793  if (isVOP2(MI->getOpcode()) &&
794  (!isOperandLegal(MI, Src0Idx, &Src1) ||
795  !isOperandLegal(MI, Src1Idx, &Src0))) {
796  return nullptr;
797  }
798 
799  if (!Src1.isReg()) {
800  // Allow commuting instructions with Imm operands.
801  if (NewMI || !Src1.isImm() ||
802  (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
803  return nullptr;
804  }
805 
806  // Be sure to copy the source modifiers to the right place.
807  if (MachineOperand *Src0Mods
808  = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
809  MachineOperand *Src1Mods
810  = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
811 
812  int Src0ModsVal = Src0Mods->getImm();
813  if (!Src1Mods && Src0ModsVal != 0)
814  return nullptr;
815 
816  // XXX - This assert might be a lie. It might be useful to have a neg
817  // modifier with 0.0.
818  int Src1ModsVal = Src1Mods->getImm();
819  assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
820 
821  Src1Mods->setImm(Src0ModsVal);
822  Src0Mods->setImm(Src1ModsVal);
823  }
824 
825  unsigned Reg = Src0.getReg();
826  unsigned SubReg = Src0.getSubReg();
827  if (Src1.isImm())
828  Src0.ChangeToImmediate(Src1.getImm());
829  else
830  llvm_unreachable("Should only have immediates");
831 
832  Src1.ChangeToRegister(Reg, false);
833  Src1.setSubReg(SubReg);
834  } else {
835  MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
836  }
837 
838  if (MI)
839  MI->setDesc(get(CommutedOpcode));
840 
841  return MI;
842 }
843 
844 // This needs to be implemented because the source modifiers may be inserted
845 // between the true commutable operands, and the base
846 // TargetInstrInfo::commuteInstruction uses it.
848  unsigned &SrcOpIdx1,
849  unsigned &SrcOpIdx2) const {
850  const MCInstrDesc &MCID = MI->getDesc();
851  if (!MCID.isCommutable())
852  return false;
853 
854  unsigned Opc = MI->getOpcode();
855  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
856  if (Src0Idx == -1)
857  return false;
858 
859  // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
860  // immediate.
861  if (!MI->getOperand(Src0Idx).isReg())
862  return false;
863 
864  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
865  if (Src1Idx == -1)
866  return false;
867 
868  if (!MI->getOperand(Src1Idx).isReg())
869  return false;
870 
871  // If any source modifiers are set, the generic instruction commuting won't
872  // understand how to copy the source modifiers.
873  if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
874  hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
875  return false;
876 
877  SrcOpIdx1 = Src0Idx;
878  SrcOpIdx2 = Src1Idx;
879  return true;
880 }
881 
884  unsigned DstReg,
885  unsigned SrcReg) const {
886  return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
887  DstReg) .addReg(SrcReg);
888 }
889 
890 bool SIInstrInfo::isMov(unsigned Opcode) const {
891  switch(Opcode) {
892  default: return false;
893  case AMDGPU::S_MOV_B32:
894  case AMDGPU::S_MOV_B64:
895  case AMDGPU::V_MOV_B32_e32:
896  case AMDGPU::V_MOV_B32_e64:
897  return true;
898  }
899 }
900 
901 bool
903  return RC != &AMDGPU::EXECRegRegClass;
904 }
905 
907  unsigned Opc = MI.getOpcode();
908  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
909  AMDGPU::OpName::src0_modifiers);
910  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
911  AMDGPU::OpName::src1_modifiers);
912  int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
913  AMDGPU::OpName::src2_modifiers);
914 
915  MI.RemoveOperand(Src2ModIdx);
916  MI.RemoveOperand(Src1ModIdx);
917  MI.RemoveOperand(Src0ModIdx);
918 }
919 
921  unsigned Reg, MachineRegisterInfo *MRI) const {
922  if (!MRI->hasOneNonDBGUse(Reg))
923  return false;
924 
925  unsigned Opc = UseMI->getOpcode();
926  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
927  // Don't fold if we are using source modifiers. The new VOP2 instructions
928  // don't have them.
929  if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
930  hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) ||
931  hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
932  return false;
933  }
934 
935  MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
936  MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
937  MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
938 
939  // Multiplied part is the constant: Use v_madmk_f32
940  // We should only expect these to be on src0 due to canonicalizations.
941  if (Src0->isReg() && Src0->getReg() == Reg) {
942  if (!Src1->isReg() ||
943  (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
944  return false;
945 
946  if (!Src2->isReg() ||
947  (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
948  return false;
949 
950  // We need to do some weird looking operand shuffling since the madmk
951  // operands are out of the normal expected order with the multiplied
952  // constant as the last operand.
953  //
954  // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
955  // src0 -> src2 K
956  // src1 -> src0
957  // src2 -> src1
958 
959  const int64_t Imm = DefMI->getOperand(1).getImm();
960 
961  // FIXME: This would be a lot easier if we could return a new instruction
962  // instead of having to modify in place.
963 
964  // Remove these first since they are at the end.
966  AMDGPU::OpName::omod));
968  AMDGPU::OpName::clamp));
969 
970  unsigned Src1Reg = Src1->getReg();
971  unsigned Src1SubReg = Src1->getSubReg();
972  unsigned Src2Reg = Src2->getReg();
973  unsigned Src2SubReg = Src2->getSubReg();
974  Src0->setReg(Src1Reg);
975  Src0->setSubReg(Src1SubReg);
976  Src0->setIsKill(Src1->isKill());
977 
978  Src1->setReg(Src2Reg);
979  Src1->setSubReg(Src2SubReg);
980  Src1->setIsKill(Src2->isKill());
981 
982  if (Opc == AMDGPU::V_MAC_F32_e64) {
983  UseMI->untieRegOperand(
984  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
985  }
986 
988  AMDGPU::OpName::src2));
989  // ChangingToImmediate adds Src2 back to the instruction.
990  Src2->ChangeToImmediate(Imm);
991 
992  removeModOperands(*UseMI);
993  UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
994 
995  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
996  if (DeleteDef)
997  DefMI->eraseFromParent();
998 
999  return true;
1000  }
1001 
1002  // Added part is the constant: Use v_madak_f32
1003  if (Src2->isReg() && Src2->getReg() == Reg) {
1004  // Not allowed to use constant bus for another operand.
1005  // We can however allow an inline immediate as src0.
1006  if (!Src0->isImm() &&
1007  (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
1008  return false;
1009 
1010  if (!Src1->isReg() ||
1011  (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
1012  return false;
1013 
1014  const int64_t Imm = DefMI->getOperand(1).getImm();
1015 
1016  // FIXME: This would be a lot easier if we could return a new instruction
1017  // instead of having to modify in place.
1018 
1019  // Remove these first since they are at the end.
1021  AMDGPU::OpName::omod));
1023  AMDGPU::OpName::clamp));
1024 
1025  if (Opc == AMDGPU::V_MAC_F32_e64) {
1026  UseMI->untieRegOperand(
1027  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
1028  }
1029 
1030  // ChangingToImmediate adds Src2 back to the instruction.
1031  Src2->ChangeToImmediate(Imm);
1032 
1033  // These come before src2.
1034  removeModOperands(*UseMI);
1035  UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
1036 
1037  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1038  if (DeleteDef)
1039  DefMI->eraseFromParent();
1040 
1041  return true;
1042  }
1043  }
1044 
1045  return false;
1046 }
1047 
1048 bool
1050  AliasAnalysis *AA) const {
1051  switch(MI->getOpcode()) {
1052  default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
1053  case AMDGPU::S_MOV_B32:
1054  case AMDGPU::S_MOV_B64:
1055  case AMDGPU::V_MOV_B32_e32:
1056  return MI->getOperand(1).isImm();
1057  }
1058 }
1059 
1060 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
1061  int WidthB, int OffsetB) {
1062  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1063  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1064  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1065  return LowOffset + LowWidth <= HighOffset;
1066 }
1067 
1068 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
1069  MachineInstr *MIb) const {
1070  unsigned BaseReg0, Offset0;
1071  unsigned BaseReg1, Offset1;
1072 
1073  if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
1074  getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
1075  assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
1076  "read2 / write2 not expected here yet");
1077  unsigned Width0 = (*MIa->memoperands_begin())->getSize();
1078  unsigned Width1 = (*MIb->memoperands_begin())->getSize();
1079  if (BaseReg0 == BaseReg1 &&
1080  offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
1081  return true;
1082  }
1083  }
1084 
1085  return false;
1086 }
1087 
1089  MachineInstr *MIb,
1090  AliasAnalysis *AA) const {
1091  unsigned Opc0 = MIa->getOpcode();
1092  unsigned Opc1 = MIb->getOpcode();
1093 
1094  assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
1095  "MIa must load from or modify a memory location");
1096  assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
1097  "MIb must load from or modify a memory location");
1098 
1099  if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects())
1100  return false;
1101 
1102  // XXX - Can we relax this between address spaces?
1103  if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
1104  return false;
1105 
1106  // TODO: Should we check the address space from the MachineMemOperand? That
1107  // would allow us to distinguish objects we know don't alias based on the
1108  // underlying addres space, even if it was lowered to a different one,
1109  // e.g. private accesses lowered to use MUBUF instructions on a scratch
1110  // buffer.
1111  if (isDS(Opc0)) {
1112  if (isDS(Opc1))
1113  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1114 
1115  return !isFLAT(Opc1);
1116  }
1117 
1118  if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
1119  if (isMUBUF(Opc1) || isMTBUF(Opc1))
1120  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1121 
1122  return !isFLAT(Opc1) && !isSMRD(Opc1);
1123  }
1124 
1125  if (isSMRD(Opc0)) {
1126  if (isSMRD(Opc1))
1127  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1128 
1129  return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
1130  }
1131 
1132  if (isFLAT(Opc0)) {
1133  if (isFLAT(Opc1))
1134  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1135 
1136  return false;
1137  }
1138 
1139  return false;
1140 }
1141 
1144  LiveVariables *LV) const {
1145 
1146  switch (MI->getOpcode()) {
1147  default: return nullptr;
1148  case AMDGPU::V_MAC_F32_e64: break;
1149  case AMDGPU::V_MAC_F32_e32: {
1150  const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
1151  if (Src0->isImm() && !isInlineConstant(*Src0, 4))
1152  return nullptr;
1153  break;
1154  }
1155  }
1156 
1157  const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst);
1158  const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
1159  const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1);
1160  const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2);
1161 
1162  return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
1163  .addOperand(*Dst)
1164  .addImm(0) // Src0 mods
1165  .addOperand(*Src0)
1166  .addImm(0) // Src1 mods
1167  .addOperand(*Src1)
1168  .addImm(0) // Src mods
1169  .addOperand(*Src2)
1170  .addImm(0) // clamp
1171  .addImm(0); // omod
1172 }
1173 
1174 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
1175  int64_t SVal = Imm.getSExtValue();
1176  if (SVal >= -16 && SVal <= 64)
1177  return true;
1178 
1179  if (Imm.getBitWidth() == 64) {
1180  uint64_t Val = Imm.getZExtValue();
1181  return (DoubleToBits(0.0) == Val) ||
1182  (DoubleToBits(1.0) == Val) ||
1183  (DoubleToBits(-1.0) == Val) ||
1184  (DoubleToBits(0.5) == Val) ||
1185  (DoubleToBits(-0.5) == Val) ||
1186  (DoubleToBits(2.0) == Val) ||
1187  (DoubleToBits(-2.0) == Val) ||
1188  (DoubleToBits(4.0) == Val) ||
1189  (DoubleToBits(-4.0) == Val);
1190  }
1191 
1192  // The actual type of the operand does not seem to matter as long
1193  // as the bits match one of the inline immediate values. For example:
1194  //
1195  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1196  // so it is a legal inline immediate.
1197  //
1198  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1199  // floating-point, so it is a legal inline immediate.
1200  uint32_t Val = Imm.getZExtValue();
1201 
1202  return (FloatToBits(0.0f) == Val) ||
1203  (FloatToBits(1.0f) == Val) ||
1204  (FloatToBits(-1.0f) == Val) ||
1205  (FloatToBits(0.5f) == Val) ||
1206  (FloatToBits(-0.5f) == Val) ||
1207  (FloatToBits(2.0f) == Val) ||
1208  (FloatToBits(-2.0f) == Val) ||
1209  (FloatToBits(4.0f) == Val) ||
1210  (FloatToBits(-4.0f) == Val);
1211 }
1212 
1214  unsigned OpSize) const {
1215  if (MO.isImm()) {
1216  // MachineOperand provides no way to tell the true operand size, since it
1217  // only records a 64-bit value. We need to know the size to determine if a
1218  // 32-bit floating point immediate bit pattern is legal for an integer
1219  // immediate. It would be for any 32-bit integer operand, but would not be
1220  // for a 64-bit one.
1221 
1222  unsigned BitSize = 8 * OpSize;
1223  return isInlineConstant(APInt(BitSize, MO.getImm(), true));
1224  }
1225 
1226  return false;
1227 }
1228 
1230  unsigned OpSize) const {
1231  return MO.isImm() && !isInlineConstant(MO, OpSize);
1232 }
1233 
1234 static bool compareMachineOp(const MachineOperand &Op0,
1235  const MachineOperand &Op1) {
1236  if (Op0.getType() != Op1.getType())
1237  return false;
1238 
1239  switch (Op0.getType()) {
1241  return Op0.getReg() == Op1.getReg();
1243  return Op0.getImm() == Op1.getImm();
1244  default:
1245  llvm_unreachable("Didn't expect to be comparing these operand types");
1246  }
1247 }
1248 
1250  const MachineOperand &MO) const {
1251  const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
1252 
1253  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
1254 
1255  if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
1256  return true;
1257 
1258  if (OpInfo.RegClass < 0)
1259  return false;
1260 
1261  unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
1262  if (isLiteralConstant(MO, OpSize))
1263  return RI.opCanUseLiteralConstant(OpInfo.OperandType);
1264 
1265  return RI.opCanUseInlineConstant(OpInfo.OperandType);
1266 }
1267 
1268 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
1269  int Op32 = AMDGPU::getVOPe32(Opcode);
1270  if (Op32 == -1)
1271  return false;
1272 
1273  return pseudoToMCOpcode(Op32) != -1;
1274 }
1275 
1276 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
1277  // The src0_modifier operand is present on all instructions
1278  // that have modifiers.
1279 
1280  return AMDGPU::getNamedOperandIdx(Opcode,
1281  AMDGPU::OpName::src0_modifiers) != -1;
1282 }
1283 
1285  unsigned OpName) const {
1286  const MachineOperand *Mods = getNamedOperand(MI, OpName);
1287  return Mods && Mods->getImm();
1288 }
1289 
1291  const MachineOperand &MO,
1292  unsigned OpSize) const {
1293  // Literal constants use the constant bus.
1294  if (isLiteralConstant(MO, OpSize))
1295  return true;
1296 
1297  if (!MO.isReg() || !MO.isUse())
1298  return false;
1299 
1301  return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
1302 
1303  // FLAT_SCR is just an SGPR pair.
1304  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
1305  return true;
1306 
1307  // EXEC register uses the constant bus.
1308  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
1309  return true;
1310 
1311  // SGPRs use the constant bus
1312  if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
1313  (!MO.isImplicit() &&
1314  (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
1315  AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
1316  return true;
1317  }
1318 
1319  return false;
1320 }
1321 
1323  StringRef &ErrInfo) const {
1324  uint16_t Opcode = MI->getOpcode();
1325  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1326  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
1327  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
1328  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
1329 
1330  // Make sure the number of operands is correct.
1331  const MCInstrDesc &Desc = get(Opcode);
1332  if (!Desc.isVariadic() &&
1333  Desc.getNumOperands() != MI->getNumExplicitOperands()) {
1334  ErrInfo = "Instruction has wrong number of operands.";
1335  return false;
1336  }
1337 
1338  // Make sure the register classes are correct
1339  for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
1340  if (MI->getOperand(i).isFPImm()) {
1341  ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
1342  "all fp values to integers.";
1343  return false;
1344  }
1345 
1346  int RegClass = Desc.OpInfo[i].RegClass;
1347 
1348  switch (Desc.OpInfo[i].OperandType) {
1350  if (MI->getOperand(i).isImm()) {
1351  ErrInfo = "Illegal immediate value for operand.";
1352  return false;
1353  }
1354  break;
1356  break;
1358  if (isLiteralConstant(MI->getOperand(i),
1359  RI.getRegClass(RegClass)->getSize())) {
1360  ErrInfo = "Illegal immediate value for operand.";
1361  return false;
1362  }
1363  break;
1365  // Check if this operand is an immediate.
1366  // FrameIndex operands will be replaced by immediates, so they are
1367  // allowed.
1368  if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
1369  ErrInfo = "Expected immediate, but got non-immediate";
1370  return false;
1371  }
1372  // Fall-through
1373  default:
1374  continue;
1375  }
1376 
1377  if (!MI->getOperand(i).isReg())
1378  continue;
1379 
1380  if (RegClass != -1) {
1381  unsigned Reg = MI->getOperand(i).getReg();
1383  continue;
1384 
1385  const TargetRegisterClass *RC = RI.getRegClass(RegClass);
1386  if (!RC->contains(Reg)) {
1387  ErrInfo = "Operand has incorrect register class.";
1388  return false;
1389  }
1390  }
1391  }
1392 
1393 
1394  // Verify VOP*
1395  if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
1396  // Only look at the true operands. Only a real operand can use the constant
1397  // bus, and we don't want to check pseudo-operands like the source modifier
1398  // flags.
1399  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
1400 
1401  unsigned ConstantBusCount = 0;
1402  unsigned SGPRUsed = AMDGPU::NoRegister;
1403  for (int OpIdx : OpIndices) {
1404  if (OpIdx == -1)
1405  break;
1406  const MachineOperand &MO = MI->getOperand(OpIdx);
1407  if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
1408  if (MO.isReg()) {
1409  if (MO.getReg() != SGPRUsed)
1410  ++ConstantBusCount;
1411  SGPRUsed = MO.getReg();
1412  } else {
1413  ++ConstantBusCount;
1414  }
1415  }
1416  }
1417  if (ConstantBusCount > 1) {
1418  ErrInfo = "VOP* instruction uses the constant bus more than once";
1419  return false;
1420  }
1421  }
1422 
1423  // Verify misc. restrictions on specific instructions.
1424  if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
1425  Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
1426  const MachineOperand &Src0 = MI->getOperand(Src0Idx);
1427  const MachineOperand &Src1 = MI->getOperand(Src1Idx);
1428  const MachineOperand &Src2 = MI->getOperand(Src2Idx);
1429  if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
1430  if (!compareMachineOp(Src0, Src1) &&
1431  !compareMachineOp(Src0, Src2)) {
1432  ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
1433  return false;
1434  }
1435  }
1436  }
1437 
1438  return true;
1439 }
1440 
1442  switch (MI.getOpcode()) {
1443  default: return AMDGPU::INSTRUCTION_LIST_END;
1445  case AMDGPU::COPY: return AMDGPU::COPY;
1446  case AMDGPU::PHI: return AMDGPU::PHI;
1448  case AMDGPU::S_MOV_B32:
1449  return MI.getOperand(1).isReg() ?
1450  AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
1451  case AMDGPU::S_ADD_I32:
1452  case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
1453  case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
1454  case AMDGPU::S_SUB_I32:
1455  case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
1456  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
1457  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
1458  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
1459  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
1460  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
1461  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
1462  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
1463  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
1464  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
1465  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
1466  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
1467  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
1468  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
1469  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
1470  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
1471  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
1472  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
1473  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
1474  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
1475  case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
1476  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
1477  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
1478  case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
1479  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
1480  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
1481  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
1482  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
1483  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
1484  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
1485  case AMDGPU::S_LOAD_DWORD_IMM:
1486  case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
1487  case AMDGPU::S_LOAD_DWORDX2_IMM:
1488  case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
1489  case AMDGPU::S_LOAD_DWORDX4_IMM:
1490  case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
1491  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
1492  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
1493  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
1494  case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
1495  }
1496 }
1497 
1499  return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
1500 }
1501 
1503  unsigned OpNo) const {
1504  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1505  const MCInstrDesc &Desc = get(MI.getOpcode());
1506  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
1507  Desc.OpInfo[OpNo].RegClass == -1) {
1508  unsigned Reg = MI.getOperand(OpNo).getReg();
1509 
1511  return MRI.getRegClass(Reg);
1512  return RI.getPhysRegClass(Reg);
1513  }
1514 
1515  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1516  return RI.getRegClass(RCID);
1517 }
1518 
1519 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
1520  switch (MI.getOpcode()) {
1521  case AMDGPU::COPY:
1522  case AMDGPU::REG_SEQUENCE:
1523  case AMDGPU::PHI:
1524  case AMDGPU::INSERT_SUBREG:
1525  return RI.hasVGPRs(getOpRegClass(MI, 0));
1526  default:
1527  return RI.hasVGPRs(getOpRegClass(MI, OpNo));
1528  }
1529 }
1530 
1531 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
1533  MachineBasicBlock *MBB = MI->getParent();
1534  MachineOperand &MO = MI->getOperand(OpIdx);
1535  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1536  unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
1537  const TargetRegisterClass *RC = RI.getRegClass(RCID);
1538  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1539  if (MO.isReg())
1540  Opcode = AMDGPU::COPY;
1541  else if (RI.isSGPRClass(RC))
1542  Opcode = AMDGPU::S_MOV_B32;
1543 
1544 
1545  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
1546  if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
1547  VRC = &AMDGPU::VReg_64RegClass;
1548  else
1549  VRC = &AMDGPU::VGPR_32RegClass;
1550 
1551  unsigned Reg = MRI.createVirtualRegister(VRC);
1552  DebugLoc DL = MBB->findDebugLoc(I);
1553  BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
1554  .addOperand(MO);
1555  MO.ChangeToRegister(Reg, false);
1556 }
1557 
1558 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
1559  MachineRegisterInfo &MRI,
1560  MachineOperand &SuperReg,
1561  const TargetRegisterClass *SuperRC,
1562  unsigned SubIdx,
1563  const TargetRegisterClass *SubRC)
1564  const {
1565  assert(SuperReg.isReg());
1566 
1567  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
1568  unsigned SubReg = MRI.createVirtualRegister(SubRC);
1569 
1570  // Just in case the super register is itself a sub-register, copy it to a new
1571  // value so we don't need to worry about merging its subreg index with the
1572  // SubIdx passed to this function. The register coalescer should be able to
1573  // eliminate this extra copy.
1574  MachineBasicBlock *MBB = MI->getParent();
1575  DebugLoc DL = MI->getDebugLoc();
1576 
1577  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
1578  .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
1579 
1580  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
1581  .addReg(NewSuperReg, 0, SubIdx);
1582 
1583  return SubReg;
1584 }
1585 
1586 MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
1588  MachineRegisterInfo &MRI,
1589  MachineOperand &Op,
1590  const TargetRegisterClass *SuperRC,
1591  unsigned SubIdx,
1592  const TargetRegisterClass *SubRC) const {
1593  if (Op.isImm()) {
1594  // XXX - Is there a better way to do this?
1595  if (SubIdx == AMDGPU::sub0)
1596  return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
1597  if (SubIdx == AMDGPU::sub1)
1598  return MachineOperand::CreateImm(Op.getImm() >> 32);
1599 
1600  llvm_unreachable("Unhandled register index for immediate");
1601  }
1602 
1603  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
1604  SubIdx, SubRC);
1605  return MachineOperand::CreateReg(SubReg, false);
1606 }
1607 
1608 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
1610  MachineRegisterInfo &MRI,
1611  const TargetRegisterClass *RC,
1612  const MachineOperand &Op) const {
1613  MachineBasicBlock *MBB = MI->getParent();
1614  DebugLoc DL = MI->getDebugLoc();
1615  unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1616  unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1617  unsigned Dst = MRI.createVirtualRegister(RC);
1618 
1619  MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
1620  LoDst)
1621  .addImm(Op.getImm() & 0xFFFFFFFF);
1622  MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
1623  HiDst)
1624  .addImm(Op.getImm() >> 32);
1625 
1626  BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
1627  .addReg(LoDst)
1628  .addImm(AMDGPU::sub0)
1629  .addReg(HiDst)
1630  .addImm(AMDGPU::sub1);
1631 
1632  Worklist.push_back(Lo);
1633  Worklist.push_back(Hi);
1634 
1635  return Dst;
1636 }
1637 
1638 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
1639 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
1640  assert(Inst->getNumExplicitOperands() == 3);
1641  MachineOperand Op1 = Inst->getOperand(1);
1642  Inst->RemoveOperand(1);
1643  Inst->addOperand(Op1);
1644 }
1645 
1646 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
1647  const MachineOperand *MO) const {
1648  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1649  const MCInstrDesc &InstDesc = get(MI->getOpcode());
1650  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
1651  const TargetRegisterClass *DefinedRC =
1652  OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
1653  if (!MO)
1654  MO = &MI->getOperand(OpIdx);
1655 
1656  if (isVALU(InstDesc.Opcode) &&
1657  usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
1658  unsigned SGPRUsed =
1659  MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
1660  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1661  if (i == OpIdx)
1662  continue;
1663  const MachineOperand &Op = MI->getOperand(i);
1664  if (Op.isReg() && Op.getReg() != SGPRUsed &&
1665  usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
1666  return false;
1667  }
1668  }
1669  }
1670 
1671  if (MO->isReg()) {
1672  assert(DefinedRC);
1673  const TargetRegisterClass *RC =
1675  MRI.getRegClass(MO->getReg()) :
1676  RI.getPhysRegClass(MO->getReg());
1677 
1678  // In order to be legal, the common sub-class must be equal to the
1679  // class of the current operand. For example:
1680  //
1681  // v_mov_b32 s0 ; Operand defined as vsrc_32
1682  // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
1683  //
1684  // s_sendmsg 0, s0 ; Operand defined as m0reg
1685  // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
1686 
1687  return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
1688  }
1689 
1690 
1691  // Handle non-register types that are treated like immediates.
1692  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
1693 
1694  if (!DefinedRC) {
1695  // This operand expects an immediate.
1696  return true;
1697  }
1698 
1699  return isImmOperandLegal(MI, OpIdx, *MO);
1700 }
1701 
1704 
1705  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1706  AMDGPU::OpName::src0);
1707  int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1708  AMDGPU::OpName::src1);
1709  int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1710  AMDGPU::OpName::src2);
1711 
1712  // Legalize VOP2
1713  if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
1714  // Legalize src0
1715  if (!isOperandLegal(MI, Src0Idx))
1716  legalizeOpWithMove(MI, Src0Idx);
1717 
1718  // Legalize src1
1719  if (isOperandLegal(MI, Src1Idx))
1720  return;
1721 
1722  // Usually src0 of VOP2 instructions allow more types of inputs
1723  // than src1, so try to commute the instruction to decrease our
1724  // chances of having to insert a MOV instruction to legalize src1.
1725  if (MI->isCommutable()) {
1726  if (commuteInstruction(MI))
1727  // If we are successful in commuting, then we know MI is legal, so
1728  // we are done.
1729  return;
1730  }
1731 
1732  legalizeOpWithMove(MI, Src1Idx);
1733  return;
1734  }
1735 
1736  // XXX - Do any VOP3 instructions read VCC?
1737  // Legalize VOP3
1738  if (isVOP3(MI->getOpcode())) {
1739  int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
1740 
1741  // Find the one SGPR operand we are allowed to use.
1742  unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
1743 
1744  for (unsigned i = 0; i < 3; ++i) {
1745  int Idx = VOP3Idx[i];
1746  if (Idx == -1)
1747  break;
1748  MachineOperand &MO = MI->getOperand(Idx);
1749 
1750  if (MO.isReg()) {
1751  if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
1752  continue; // VGPRs are legal
1753 
1754  assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
1755 
1756  if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
1757  SGPRReg = MO.getReg();
1758  // We can use one SGPR in each VOP3 instruction.
1759  continue;
1760  }
1761  } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
1762  // If it is not a register and not a literal constant, then it must be
1763  // an inline constant which is always legal.
1764  continue;
1765  }
1766  // If we make it this far, then the operand is not legal and we must
1767  // legalize it.
1768  legalizeOpWithMove(MI, Idx);
1769  }
1770  }
1771 
1772  // Legalize REG_SEQUENCE and PHI
1773  // The register class of the operands much be the same type as the register
1774  // class of the output.
1775  if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
1776  MI->getOpcode() == AMDGPU::PHI) {
1777  const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
1778  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
1779  if (!MI->getOperand(i).isReg() ||
1781  continue;
1782  const TargetRegisterClass *OpRC =
1783  MRI.getRegClass(MI->getOperand(i).getReg());
1784  if (RI.hasVGPRs(OpRC)) {
1785  VRC = OpRC;
1786  } else {
1787  SRC = OpRC;
1788  }
1789  }
1790 
1791  // If any of the operands are VGPR registers, then they all most be
1792  // otherwise we will create illegal VGPR->SGPR copies when legalizing
1793  // them.
1794  if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
1795  if (!VRC) {
1796  assert(SRC);
1797  VRC = RI.getEquivalentVGPRClass(SRC);
1798  }
1799  RC = VRC;
1800  } else {
1801  RC = SRC;
1802  }
1803 
1804  // Update all the operands so they have the same type.
1805  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
1806  if (!MI->getOperand(i).isReg() ||
1808  continue;
1809  unsigned DstReg = MRI.createVirtualRegister(RC);
1810  MachineBasicBlock *InsertBB;
1812  if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
1813  InsertBB = MI->getParent();
1814  Insert = MI;
1815  } else {
1816  // MI is a PHI instruction.
1817  InsertBB = MI->getOperand(i + 1).getMBB();
1818  Insert = InsertBB->getFirstTerminator();
1819  }
1820  BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
1821  get(AMDGPU::COPY), DstReg)
1822  .addOperand(MI->getOperand(i));
1823  MI->getOperand(i).setReg(DstReg);
1824  }
1825  }
1826 
1827  // Legalize INSERT_SUBREG
1828  // src0 must have the same register class as dst
1829  if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
1830  unsigned Dst = MI->getOperand(0).getReg();
1831  unsigned Src0 = MI->getOperand(1).getReg();
1832  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
1833  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
1834  if (DstRC != Src0RC) {
1835  MachineBasicBlock &MBB = *MI->getParent();
1836  unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
1837  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
1838  .addReg(Src0);
1839  MI->getOperand(1).setReg(NewSrc0);
1840  }
1841  return;
1842  }
1843 
1844  // Legalize MUBUF* instructions
1845  // FIXME: If we start using the non-addr64 instructions for compute, we
1846  // may need to legalize them here.
1847  int SRsrcIdx =
1848  AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
1849  if (SRsrcIdx != -1) {
1850  // We have an MUBUF instruction
1851  MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
1852  unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
1853  if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
1854  RI.getRegClass(SRsrcRC))) {
1855  // The operands are legal.
1856  // FIXME: We may need to legalize operands besided srsrc.
1857  return;
1858  }
1859 
1860  MachineBasicBlock &MBB = *MI->getParent();
1861  // Extract the ptr from the resource descriptor.
1862 
1863  // SRsrcPtrLo = srsrc:sub0
1864  unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
1865  &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
1866 
1867  // SRsrcPtrHi = srsrc:sub1
1868  unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
1869  &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
1870 
1871  // Create an empty resource descriptor
1872  unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1873  unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1874  unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1875  unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
1876  uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
1877 
1878  // Zero64 = 0
1879  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
1880  Zero64)
1881  .addImm(0);
1882 
1883  // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
1884  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1885  SRsrcFormatLo)
1886  .addImm(RsrcDataFormat & 0xFFFFFFFF);
1887 
1888  // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
1889  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1890  SRsrcFormatHi)
1891  .addImm(RsrcDataFormat >> 32);
1892 
1893  // NewSRsrc = {Zero64, SRsrcFormat}
1894  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1895  NewSRsrc)
1896  .addReg(Zero64)
1897  .addImm(AMDGPU::sub0_sub1)
1898  .addReg(SRsrcFormatLo)
1899  .addImm(AMDGPU::sub2)
1900  .addReg(SRsrcFormatHi)
1901  .addImm(AMDGPU::sub3);
1902 
1903  MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
1904  unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
1905  unsigned NewVAddrLo;
1906  unsigned NewVAddrHi;
1907  if (VAddr) {
1908  // This is already an ADDR64 instruction so we need to add the pointer
1909  // extracted from the resource descriptor to the current value of VAddr.
1910  NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1911  NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1912 
1913  // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
1914  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
1915  NewVAddrLo)
1916  .addReg(SRsrcPtrLo)
1917  .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
1918  .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
1919 
1920  // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
1921  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
1922  NewVAddrHi)
1923  .addReg(SRsrcPtrHi)
1924  .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
1925  .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
1926  .addReg(AMDGPU::VCC, RegState::Implicit);
1927 
1928  } else {
1929  // This instructions is the _OFFSET variant, so we need to convert it to
1930  // ADDR64.
1931  MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
1932  MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
1933  MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
1934 
1935  // Create the new instruction.
1936  unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
1937  MachineInstr *Addr64 =
1938  BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
1939  .addOperand(*VData)
1940  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
1941  // This will be replaced later
1942  // with the new value of vaddr.
1943  .addOperand(*SRsrc)
1944  .addOperand(*SOffset)
1945  .addOperand(*Offset)
1946  .addImm(0) // glc
1947  .addImm(0) // slc
1948  .addImm(0); // tfe
1949 
1950  MI->removeFromParent();
1951  MI = Addr64;
1952 
1953  NewVAddrLo = SRsrcPtrLo;
1954  NewVAddrHi = SRsrcPtrHi;
1955  VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
1956  SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
1957  }
1958 
1959  // NewVaddr = {NewVaddrHi, NewVaddrLo}
1960  BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1961  NewVAddr)
1962  .addReg(NewVAddrLo)
1963  .addImm(AMDGPU::sub0)
1964  .addReg(NewVAddrHi)
1965  .addImm(AMDGPU::sub1);
1966 
1967 
1968  // Update the instruction to use NewVaddr
1969  VAddr->setReg(NewVAddr);
1970  // Update the instruction to use NewSRsrc
1971  SRsrc->setReg(NewSRsrc);
1972  }
1973 }
1974 
1976  const TargetRegisterClass *HalfRC,
1977  unsigned HalfImmOp, unsigned HalfSGPROp,
1978  MachineInstr *&Lo, MachineInstr *&Hi) const {
1979 
1980  DebugLoc DL = MI->getDebugLoc();
1981  MachineBasicBlock *MBB = MI->getParent();
1982  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1983  unsigned RegLo = MRI.createVirtualRegister(HalfRC);
1984  unsigned RegHi = MRI.createVirtualRegister(HalfRC);
1985  unsigned HalfSize = HalfRC->getSize();
1986  const MachineOperand *OffOp =
1987  getNamedOperand(*MI, AMDGPU::OpName::offset);
1988  const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
1989 
1990  // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
1991  // on VI.
1992 
1993  bool IsKill = SBase->isKill();
1994  if (OffOp) {
1995  bool isVI =
1996  MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
1998  unsigned OffScale = isVI ? 1 : 4;
1999  // Handle the _IMM variant
2000  unsigned LoOffset = OffOp->getImm() * OffScale;
2001  unsigned HiOffset = LoOffset + HalfSize;
2002  Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
2003  // Use addReg instead of addOperand
2004  // to make sure kill flag is cleared.
2005  .addReg(SBase->getReg(), 0, SBase->getSubReg())
2006  .addImm(LoOffset / OffScale);
2007 
2008  if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
2009  unsigned OffsetSGPR =
2010  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2011  BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
2012  .addImm(HiOffset); // The offset in register is in bytes.
2013  Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
2014  .addReg(SBase->getReg(), getKillRegState(IsKill),
2015  SBase->getSubReg())
2016  .addReg(OffsetSGPR);
2017  } else {
2018  Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
2019  .addReg(SBase->getReg(), getKillRegState(IsKill),
2020  SBase->getSubReg())
2021  .addImm(HiOffset / OffScale);
2022  }
2023  } else {
2024  // Handle the _SGPR variant
2025  MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
2026  Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
2027  .addReg(SBase->getReg(), 0, SBase->getSubReg())
2028  .addOperand(*SOff);
2029  unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2030  BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
2031  .addOperand(*SOff)
2032  .addImm(HalfSize);
2033  Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
2034  .addReg(SBase->getReg(), getKillRegState(IsKill),
2035  SBase->getSubReg())
2036  .addReg(OffsetSGPR);
2037  }
2038 
2039  unsigned SubLo, SubHi;
2040  switch (HalfSize) {
2041  case 4:
2042  SubLo = AMDGPU::sub0;
2043  SubHi = AMDGPU::sub1;
2044  break;
2045  case 8:
2046  SubLo = AMDGPU::sub0_sub1;
2047  SubHi = AMDGPU::sub2_sub3;
2048  break;
2049  case 16:
2050  SubLo = AMDGPU::sub0_sub1_sub2_sub3;
2051  SubHi = AMDGPU::sub4_sub5_sub6_sub7;
2052  break;
2053  case 32:
2054  SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2055  SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
2056  break;
2057  default:
2058  llvm_unreachable("Unhandled HalfSize");
2059  }
2060 
2061  BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
2062  .addOperand(MI->getOperand(0))
2063  .addReg(RegLo)
2064  .addImm(SubLo)
2065  .addReg(RegHi)
2066  .addImm(SubHi);
2067 }
2068 
2070  MachineBasicBlock *MBB = MI->getParent();
2071  switch (MI->getOpcode()) {
2072  case AMDGPU::S_LOAD_DWORD_IMM:
2073  case AMDGPU::S_LOAD_DWORD_SGPR:
2074  case AMDGPU::S_LOAD_DWORDX2_IMM:
2075  case AMDGPU::S_LOAD_DWORDX2_SGPR:
2076  case AMDGPU::S_LOAD_DWORDX4_IMM:
2077  case AMDGPU::S_LOAD_DWORDX4_SGPR: {
2078  unsigned NewOpcode = getVALUOp(*MI);
2079  unsigned RegOffset;
2080  unsigned ImmOffset;
2081 
2082  if (MI->getOperand(2).isReg()) {
2083  RegOffset = MI->getOperand(2).getReg();
2084  ImmOffset = 0;
2085  } else {
2086  assert(MI->getOperand(2).isImm());
2087  // SMRD instructions take a dword offsets on SI and byte offset on VI
2088  // and MUBUF instructions always take a byte offset.
2089  ImmOffset = MI->getOperand(2).getImm();
2090  if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
2092  ImmOffset <<= 2;
2093  RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2094 
2095  if (isUInt<12>(ImmOffset)) {
2096  BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
2097  RegOffset)
2098  .addImm(0);
2099  } else {
2100  BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
2101  RegOffset)
2102  .addImm(ImmOffset);
2103  ImmOffset = 0;
2104  }
2105  }
2106 
2107  unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
2108  unsigned DWord0 = RegOffset;
2109  unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2110  unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2111  unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2112  uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
2113 
2114  BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
2115  .addImm(0);
2116  BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
2117  .addImm(RsrcDataFormat & 0xFFFFFFFF);
2118  BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
2119  .addImm(RsrcDataFormat >> 32);
2120  BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
2121  .addReg(DWord0)
2122  .addImm(AMDGPU::sub0)
2123  .addReg(DWord1)
2124  .addImm(AMDGPU::sub1)
2125  .addReg(DWord2)
2126  .addImm(AMDGPU::sub2)
2127  .addReg(DWord3)
2128  .addImm(AMDGPU::sub3);
2129  MI->setDesc(get(NewOpcode));
2130  if (MI->getOperand(2).isReg()) {
2131  MI->getOperand(2).setReg(SRsrc);
2132  } else {
2133  MI->getOperand(2).ChangeToRegister(SRsrc, false);
2134  }
2136  MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
2137  MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
2138  MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
2139  MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
2140 
2141  const TargetRegisterClass *NewDstRC =
2142  RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
2143 
2144  unsigned DstReg = MI->getOperand(0).getReg();
2145  unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
2146  MRI.replaceRegWith(DstReg, NewDstReg);
2147  break;
2148  }
2149  case AMDGPU::S_LOAD_DWORDX8_IMM:
2150  case AMDGPU::S_LOAD_DWORDX8_SGPR: {
2151  MachineInstr *Lo, *Hi;
2152  splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
2153  AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
2154  MI->eraseFromParent();
2155  moveSMRDToVALU(Lo, MRI);
2156  moveSMRDToVALU(Hi, MRI);
2157  break;
2158  }
2159 
2160  case AMDGPU::S_LOAD_DWORDX16_IMM:
2161  case AMDGPU::S_LOAD_DWORDX16_SGPR: {
2162  MachineInstr *Lo, *Hi;
2163  splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
2164  AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
2165  MI->eraseFromParent();
2166  moveSMRDToVALU(Lo, MRI);
2167  moveSMRDToVALU(Hi, MRI);
2168  break;
2169  }
2170  }
2171 }
2172 
2175  Worklist.push_back(&TopInst);
2176 
2177  while (!Worklist.empty()) {
2178  MachineInstr *Inst = Worklist.pop_back_val();
2179  MachineBasicBlock *MBB = Inst->getParent();
2180  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2181 
2182  unsigned Opcode = Inst->getOpcode();
2183  unsigned NewOpcode = getVALUOp(*Inst);
2184 
2185  // Handle some special cases
2186  switch (Opcode) {
2187  default:
2188  if (isSMRD(Inst->getOpcode())) {
2189  moveSMRDToVALU(Inst, MRI);
2190  }
2191  break;
2192  case AMDGPU::S_MOV_B64: {
2193  DebugLoc DL = Inst->getDebugLoc();
2194 
2195  // If the source operand is a register we can replace this with a
2196  // copy.
2197  if (Inst->getOperand(1).isReg()) {
2198  MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
2199  .addOperand(Inst->getOperand(0))
2200  .addOperand(Inst->getOperand(1));
2201  Worklist.push_back(Copy);
2202  } else {
2203  // Otherwise, we need to split this into two movs, because there is
2204  // no 64-bit VALU move instruction.
2205  unsigned Reg = Inst->getOperand(0).getReg();
2206  unsigned Dst = split64BitImm(Worklist,
2207  Inst,
2208  MRI,
2209  MRI.getRegClass(Reg),
2210  Inst->getOperand(1));
2211  MRI.replaceRegWith(Reg, Dst);
2212  }
2213  Inst->eraseFromParent();
2214  continue;
2215  }
2216  case AMDGPU::S_AND_B64:
2217  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
2218  Inst->eraseFromParent();
2219  continue;
2220 
2221  case AMDGPU::S_OR_B64:
2222  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
2223  Inst->eraseFromParent();
2224  continue;
2225 
2226  case AMDGPU::S_XOR_B64:
2227  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
2228  Inst->eraseFromParent();
2229  continue;
2230 
2231  case AMDGPU::S_NOT_B64:
2232  splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
2233  Inst->eraseFromParent();
2234  continue;
2235 
2236  case AMDGPU::S_BCNT1_I32_B64:
2237  splitScalar64BitBCNT(Worklist, Inst);
2238  Inst->eraseFromParent();
2239  continue;
2240 
2241  case AMDGPU::S_BFE_I64: {
2242  splitScalar64BitBFE(Worklist, Inst);
2243  Inst->eraseFromParent();
2244  continue;
2245  }
2246 
2247  case AMDGPU::S_LSHL_B32:
2249  NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
2250  swapOperands(Inst);
2251  }
2252  break;
2253  case AMDGPU::S_ASHR_I32:
2255  NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
2256  swapOperands(Inst);
2257  }
2258  break;
2259  case AMDGPU::S_LSHR_B32:
2261  NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
2262  swapOperands(Inst);
2263  }
2264  break;
2265  case AMDGPU::S_LSHL_B64:
2267  NewOpcode = AMDGPU::V_LSHLREV_B64;
2268  swapOperands(Inst);
2269  }
2270  break;
2271  case AMDGPU::S_ASHR_I64:
2273  NewOpcode = AMDGPU::V_ASHRREV_I64;
2274  swapOperands(Inst);
2275  }
2276  break;
2277  case AMDGPU::S_LSHR_B64:
2279  NewOpcode = AMDGPU::V_LSHRREV_B64;
2280  swapOperands(Inst);
2281  }
2282  break;
2283 
2284  case AMDGPU::S_BFE_U64:
2285  case AMDGPU::S_BFM_B64:
2286  llvm_unreachable("Moving this op to VALU not implemented");
2287  }
2288 
2289  if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
2290  // We cannot move this instruction to the VALU, so we should try to
2291  // legalize its operands instead.
2292  legalizeOperands(Inst);
2293  continue;
2294  }
2295 
2296  // Use the new VALU Opcode.
2297  const MCInstrDesc &NewDesc = get(NewOpcode);
2298  Inst->setDesc(NewDesc);
2299 
2300  // Remove any references to SCC. Vector instructions can't read from it, and
2301  // We're just about to add the implicit use / defs of VCC, and we don't want
2302  // both.
2303  for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
2304  MachineOperand &Op = Inst->getOperand(i);
2305  if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
2306  Inst->RemoveOperand(i);
2307  }
2308 
2309  if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
2310  // We are converting these to a BFE, so we need to add the missing
2311  // operands for the size and offset.
2312  unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
2315 
2316  } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
2317  // The VALU version adds the second operand to the result, so insert an
2318  // extra 0 operand.
2320  }
2321 
2322  addDescImplicitUseDef(NewDesc, Inst);
2323 
2324  if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
2325  const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
2326  // If we need to move this to VGPRs, we need to unpack the second operand
2327  // back into the 2 separate ones for bit offset and width.
2328  assert(OffsetWidthOp.isImm() &&
2329  "Scalar BFE is only implemented for constant width and offset");
2330  uint32_t Imm = OffsetWidthOp.getImm();
2331 
2332  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
2333  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
2334  Inst->RemoveOperand(2); // Remove old immediate.
2335  Inst->addOperand(MachineOperand::CreateImm(Offset));
2336  Inst->addOperand(MachineOperand::CreateImm(BitWidth));
2337  }
2338 
2339  // Update the destination register class.
2340 
2341  const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
2342 
2343  switch (Opcode) {
2344  // For target instructions, getOpRegClass just returns the virtual
2345  // register class associated with the operand, so we need to find an
2346  // equivalent VGPR register class in order to move the instruction to the
2347  // VALU.
2348  case AMDGPU::COPY:
2349  case AMDGPU::PHI:
2350  case AMDGPU::REG_SEQUENCE:
2351  case AMDGPU::INSERT_SUBREG:
2352  if (RI.hasVGPRs(NewDstRC))
2353  continue;
2354  NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
2355  if (!NewDstRC)
2356  continue;
2357  break;
2358  default:
2359  break;
2360  }
2361 
2362  unsigned DstReg = Inst->getOperand(0).getReg();
2363  unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
2364  MRI.replaceRegWith(DstReg, NewDstReg);
2365 
2366  // Legalize the operands
2367  legalizeOperands(Inst);
2368 
2369  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
2370  E = MRI.use_end(); I != E; ++I) {
2371  MachineInstr &UseMI = *I->getParent();
2372  if (!canReadVGPR(UseMI, I.getOperandNo())) {
2373  Worklist.push_back(&UseMI);
2374  }
2375  }
2376  }
2377 }
2378 
2379 //===----------------------------------------------------------------------===//
2380 // Indirect addressing callbacks
2381 //===----------------------------------------------------------------------===//
2382 
2383 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
2384  unsigned Channel) const {
2385  assert(Channel == 0);
2386  return RegIndex;
2387 }
2388 
2390  return &AMDGPU::VGPR_32RegClass;
2391 }
2392 
2393 void SIInstrInfo::splitScalar64BitUnaryOp(
2395  MachineInstr *Inst,
2396  unsigned Opcode) const {
2397  MachineBasicBlock &MBB = *Inst->getParent();
2398  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2399 
2400  MachineOperand &Dest = Inst->getOperand(0);
2401  MachineOperand &Src0 = Inst->getOperand(1);
2402  DebugLoc DL = Inst->getDebugLoc();
2403 
2404  MachineBasicBlock::iterator MII = Inst;
2405 
2406  const MCInstrDesc &InstDesc = get(Opcode);
2407  const TargetRegisterClass *Src0RC = Src0.isReg() ?
2408  MRI.getRegClass(Src0.getReg()) :
2409  &AMDGPU::SGPR_32RegClass;
2410 
2411  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
2412 
2413  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2414  AMDGPU::sub0, Src0SubRC);
2415 
2416  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
2417  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
2418 
2419  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
2420  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
2421  .addOperand(SrcReg0Sub0);
2422 
2423  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2424  AMDGPU::sub1, Src0SubRC);
2425 
2426  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
2427  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
2428  .addOperand(SrcReg0Sub1);
2429 
2430  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
2431  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
2432  .addReg(DestSub0)
2433  .addImm(AMDGPU::sub0)
2434  .addReg(DestSub1)
2435  .addImm(AMDGPU::sub1);
2436 
2437  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
2438 
2439  // Try to legalize the operands in case we need to swap the order to keep it
2440  // valid.
2441  Worklist.push_back(LoHalf);
2442  Worklist.push_back(HiHalf);
2443 }
2444 
2445 void SIInstrInfo::splitScalar64BitBinaryOp(
2447  MachineInstr *Inst,
2448  unsigned Opcode) const {
2449  MachineBasicBlock &MBB = *Inst->getParent();
2450  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2451 
2452  MachineOperand &Dest = Inst->getOperand(0);
2453  MachineOperand &Src0 = Inst->getOperand(1);
2454  MachineOperand &Src1 = Inst->getOperand(2);
2455  DebugLoc DL = Inst->getDebugLoc();
2456 
2457  MachineBasicBlock::iterator MII = Inst;
2458 
2459  const MCInstrDesc &InstDesc = get(Opcode);
2460  const TargetRegisterClass *Src0RC = Src0.isReg() ?
2461  MRI.getRegClass(Src0.getReg()) :
2462  &AMDGPU::SGPR_32RegClass;
2463 
2464  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
2465  const TargetRegisterClass *Src1RC = Src1.isReg() ?
2466  MRI.getRegClass(Src1.getReg()) :
2467  &AMDGPU::SGPR_32RegClass;
2468 
2469  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
2470 
2471  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2472  AMDGPU::sub0, Src0SubRC);
2473  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
2474  AMDGPU::sub0, Src1SubRC);
2475 
2476  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
2477  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
2478 
2479  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
2480  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
2481  .addOperand(SrcReg0Sub0)
2482  .addOperand(SrcReg1Sub0);
2483 
2484  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2485  AMDGPU::sub1, Src0SubRC);
2486  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
2487  AMDGPU::sub1, Src1SubRC);
2488 
2489  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
2490  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
2491  .addOperand(SrcReg0Sub1)
2492  .addOperand(SrcReg1Sub1);
2493 
2494  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
2495  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
2496  .addReg(DestSub0)
2497  .addImm(AMDGPU::sub0)
2498  .addReg(DestSub1)
2499  .addImm(AMDGPU::sub1);
2500 
2501  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
2502 
2503  // Try to legalize the operands in case we need to swap the order to keep it
2504  // valid.
2505  Worklist.push_back(LoHalf);
2506  Worklist.push_back(HiHalf);
2507 }
2508 
2509 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
2510  MachineInstr *Inst) const {
2511  MachineBasicBlock &MBB = *Inst->getParent();
2512  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2513 
2514  MachineBasicBlock::iterator MII = Inst;
2515  DebugLoc DL = Inst->getDebugLoc();
2516 
2517  MachineOperand &Dest = Inst->getOperand(0);
2518  MachineOperand &Src = Inst->getOperand(1);
2519 
2520  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
2521  const TargetRegisterClass *SrcRC = Src.isReg() ?
2522  MRI.getRegClass(Src.getReg()) :
2523  &AMDGPU::SGPR_32RegClass;
2524 
2525  unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2526  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2527 
2528  const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
2529 
2530  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
2531  AMDGPU::sub0, SrcSubRC);
2532  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
2533  AMDGPU::sub1, SrcSubRC);
2534 
2535  MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
2536  .addOperand(SrcRegSub0)
2537  .addImm(0);
2538 
2539  MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
2540  .addOperand(SrcRegSub1)
2541  .addReg(MidReg);
2542 
2543  MRI.replaceRegWith(Dest.getReg(), ResultReg);
2544 
2545  Worklist.push_back(First);
2546  Worklist.push_back(Second);
2547 }
2548 
2549 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
2550  MachineInstr *Inst) const {
2551  MachineBasicBlock &MBB = *Inst->getParent();
2552  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2553  MachineBasicBlock::iterator MII = Inst;
2554  DebugLoc DL = Inst->getDebugLoc();
2555 
2556  MachineOperand &Dest = Inst->getOperand(0);
2557  uint32_t Imm = Inst->getOperand(2).getImm();
2558  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
2559  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
2560 
2561  (void) Offset;
2562 
2563  // Only sext_inreg cases handled.
2564  assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
2565  BitWidth <= 32 &&
2566  Offset == 0 &&
2567  "Not implemented");
2568 
2569  if (BitWidth < 32) {
2570  unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2571  unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2572  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2573 
2574  BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
2575  .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
2576  .addImm(0)
2577  .addImm(BitWidth);
2578 
2579  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
2580  .addImm(31)
2581  .addReg(MidRegLo);
2582 
2583  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
2584  .addReg(MidRegLo)
2585  .addImm(AMDGPU::sub0)
2586  .addReg(MidRegHi)
2587  .addImm(AMDGPU::sub1);
2588 
2589  MRI.replaceRegWith(Dest.getReg(), ResultReg);
2590  return;
2591  }
2592 
2593  MachineOperand &Src = Inst->getOperand(1);
2594  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2595  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2596 
2597  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
2598  .addImm(31)
2599  .addReg(Src.getReg(), 0, AMDGPU::sub0);
2600 
2601  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
2602  .addReg(Src.getReg(), 0, AMDGPU::sub0)
2603  .addImm(AMDGPU::sub0)
2604  .addReg(TmpReg)
2605  .addImm(AMDGPU::sub1);
2606 
2607  MRI.replaceRegWith(Dest.getReg(), ResultReg);
2608 }
2609 
2610 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
2611  MachineInstr *Inst) const {
2612  // Add the implict and explicit register definitions.
2613  if (NewDesc.ImplicitUses) {
2614  for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
2615  unsigned Reg = NewDesc.ImplicitUses[i];
2616  Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
2617  }
2618  }
2619 
2620  if (NewDesc.ImplicitDefs) {
2621  for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
2622  unsigned Reg = NewDesc.ImplicitDefs[i];
2623  Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
2624  }
2625  }
2626 }
2627 
2628 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
2629  int OpIndices[3]) const {
2630  const MCInstrDesc &Desc = get(MI->getOpcode());
2631 
2632  // Find the one SGPR operand we are allowed to use.
2633  unsigned SGPRReg = AMDGPU::NoRegister;
2634 
2635  // First we need to consider the instruction's operand requirements before
2636  // legalizing. Some operands are required to be SGPRs, such as implicit uses
2637  // of VCC, but we are still bound by the constant bus requirement to only use
2638  // one.
2639  //
2640  // If the operand's class is an SGPR, we can never move it.
2641 
2642  for (const MachineOperand &MO : MI->implicit_operands()) {
2643  // We only care about reads.
2644  if (MO.isDef())
2645  continue;
2646 
2647  if (MO.getReg() == AMDGPU::VCC)
2648  return AMDGPU::VCC;
2649 
2650  if (MO.getReg() == AMDGPU::FLAT_SCR)
2651  return AMDGPU::FLAT_SCR;
2652  }
2653 
2654  unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
2655  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
2656 
2657  for (unsigned i = 0; i < 3; ++i) {
2658  int Idx = OpIndices[i];
2659  if (Idx == -1)
2660  break;
2661 
2662  const MachineOperand &MO = MI->getOperand(Idx);
2663  if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
2664  SGPRReg = MO.getReg();
2665 
2666  if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
2667  UsedSGPRs[i] = MO.getReg();
2668  }
2669 
2670  if (SGPRReg != AMDGPU::NoRegister)
2671  return SGPRReg;
2672 
2673  // We don't have a required SGPR operand, so we have a bit more freedom in
2674  // selecting operands to move.
2675 
2676  // Try to select the most used SGPR. If an SGPR is equal to one of the
2677  // others, we choose that.
2678  //
2679  // e.g.
2680  // V_FMA_F32 v0, s0, s0, s0 -> No moves
2681  // V_FMA_F32 v0, s0, s1, s0 -> Move s1
2682 
2683  if (UsedSGPRs[0] != AMDGPU::NoRegister) {
2684  if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
2685  SGPRReg = UsedSGPRs[0];
2686  }
2687 
2688  if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
2689  if (UsedSGPRs[1] == UsedSGPRs[2])
2690  SGPRReg = UsedSGPRs[1];
2691  }
2692 
2693  return SGPRReg;
2694 }
2695 
2697  MachineBasicBlock *MBB,
2699  unsigned ValueReg,
2700  unsigned Address, unsigned OffsetReg) const {
2701  const DebugLoc &DL = MBB->findDebugLoc(I);
2702  unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
2703  getIndirectIndexBegin(*MBB->getParent()));
2704 
2705  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
2706  .addReg(IndirectBaseReg, RegState::Define)
2707  .addOperand(I->getOperand(0))
2708  .addReg(IndirectBaseReg)
2709  .addReg(OffsetReg)
2710  .addImm(0)
2711  .addReg(ValueReg);
2712 }
2713 
2715  MachineBasicBlock *MBB,
2717  unsigned ValueReg,
2718  unsigned Address, unsigned OffsetReg) const {
2719  const DebugLoc &DL = MBB->findDebugLoc(I);
2720  unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
2721  getIndirectIndexBegin(*MBB->getParent()));
2722 
2723  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
2724  .addOperand(I->getOperand(0))
2725  .addOperand(I->getOperand(1))
2726  .addReg(IndirectBaseReg)
2727  .addReg(OffsetReg)
2728  .addImm(0);
2729 
2730 }
2731 
2733  const MachineFunction &MF) const {
2734  int End = getIndirectIndexEnd(MF);
2735  int Begin = getIndirectIndexBegin(MF);
2736 
2737  if (End == -1)
2738  return;
2739 
2740 
2741  for (int Index = Begin; Index <= End; ++Index)
2742  Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
2743 
2744  for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
2745  Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
2746 
2747  for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
2748  Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
2749 
2750  for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
2751  Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
2752 
2753  for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
2754  Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
2755 
2756  for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
2757  Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
2758 }
2759 
2761  unsigned OperandName) const {
2762  int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
2763  if (Idx == -1)
2764  return nullptr;
2765 
2766  return &MI.getOperand(Idx);
2767 }
2768 
2770  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
2771  if (ST.isAmdHsaOS()) {
2772  RsrcDataFormat |= (1ULL << 56);
2773 
2775  // Set MTYPE = 2
2776  RsrcDataFormat |= (2ULL << 59);
2777  }
2778 
2779  return RsrcDataFormat;
2780 }
bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:294
bool isImplicit() const
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
bool opCanUseInlineConstant(unsigned OpType) const
BitVector & set()
Definition: BitVector.h:218
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, unsigned NumLoads) const final
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const override
Build instruction(s) for an indirect register write.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:388
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
MachineBasicBlock * getMBB() const
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static bool isStride64(unsigned Opc)
void splitSMRD(MachineInstr *MI, const TargetRegisterClass *HalfRC, unsigned HalfImmOp, unsigned HalfSGPROp, MachineInstr *&Lo, MachineInstr *&Hi) const
Split an SMRD instruction into two smaller loads of half the.
iterator getFirstTerminator()
getFirstTerminator - returns an iterator to the first terminator instruction of this basic block...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:579
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
void moveToVALU(MachineInstr &MI) const
Replace this instruction's opcode with the equivalent VALU opcode.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void addLiveIn(unsigned Reg)
Adds the specified register as a live in.
bool hasVGPRs(const TargetRegisterClass *RC) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
unsigned getNumOperands() const
Return the number of values used by this operation.
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
MachineInstr * commuteInstruction(MachineInstr *MI, bool NewMI=false) const override
static SDValue findChainOperand(SDNode *Load)
Definition: SIInstrInfo.cpp:44
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:200
bool isFLAT(uint16_t Opcode) const
Definition: SIInstrInfo.h:215
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
unsigned getSize() const
getSize - Return the size of the register in bytes, which is also the size of a stack slot allocated ...
static use_iterator use_end()
bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const override
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const AMDGPUSubtarget & ST
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isVALU(uint16_t Opcode) const
Definition: SIInstrInfo.h:155
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, unsigned &Offset, const TargetRegisterInfo *TRI) const final
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:566
MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
void setHasSpilledVGPRs(bool Spill=true)
bool isSGPRClass(const TargetRegisterClass *RC) const
Reg
All possible values of the reg field in the ModR/M byte.
int getCommuteOrig(uint16_t Opcode)
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
const uint16_t * ImplicitUses
Definition: MCInstrDesc.h:147
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:317
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const override
Calculate the "Indirect Address" for the given RegIndex and Channel.
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:68
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineBasicBlock & front() const
bool isKill() const
int commuteOpcode(const MachineInstr &MI) const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const
virtual MachineInstr * commuteInstruction(MachineInstr *MI, bool NewMI=false) const
If a target has any instructions that are commutable but require converting to different instructions...
bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO) const
SIInstrInfo(const AMDGPUSubtarget &st)
Definition: SIInstrInfo.cpp:30
const uint64_t RSRC_DATA_FORMAT
Definition: SIInstrInfo.h:369
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void enterBasicBlock(MachineBasicBlock *mbb)
Start tracking liveness from the begin of the specific basic block.
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override
int64_t getImm() const
bool isVOP3(uint16_t Opcode) const
Definition: SIInstrInfo.h:187
Operand with register or inline constant.
Definition: SIDefines.h:50
bool isSMRD(uint16_t Opcode) const
Definition: SIInstrInfo.h:203
Generation getGeneration() const
bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
uint32_t FloatToBits(float Float)
FloatToBits - This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:541
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
bundle_iterator< MachineInstr, instr_iterator > iterator
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
Definition: SIInstrInfo.cpp:52
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
Definition: SIInstrInfo.h:267
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, unsigned OpSize) const
Returns true if this operand uses the constant bus.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
DebugLoc findDebugLoc(instr_iterator MBBI)
findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions...
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
INSERT_SUBREG - This instruction takes three operands: a register that has subregisters, a register providing an insert value, and a subregister index.
Definition: TargetOpcodes.h:49
bool isMov(unsigned Opcode) const override
static unsigned getNumOperandsNoGlue(SDNode *Node)
Definition: SIInstrInfo.cpp:37
static void removeModOperands(MachineInstr &MI)
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override
const MachineInstrBuilder & addTargetIndex(unsigned Idx, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVariadic(QueryType Type=IgnoreBundle) const
Return true if this instruction can have a variable number of operands.
Definition: MachineInstr.h:383
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:352
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
uint64_t getDefaultRsrcDataFormat() const
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const
Returns a register that is not used at any point in the function.
The AMDGPU TargetMachine interface definition for hw codgen targets.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
unsigned getSubReg() const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const
void setIsKill(bool Val=true)
int getVOPe32(uint16_t Opcode)
REG_SEQUENCE - This variadic instruction is used to form a register that represents a consecutive seq...
Definition: TargetOpcodes.h:82
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:178
bool isVOP1(uint16_t Opcode) const
Definition: SIInstrInfo.h:179
bool isInlineConstant(const APInt &Imm) const
void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned LDSSize
Number of bytes in the LDS that are being used.
bool opCanUseLiteralConstant(unsigned OpType) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
static bool offsetsDoNotOverlap(int WidthA, int OffsetA, int WidthB, int OffsetB)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
const TargetRegisterClass * getIndirectAddrRegClass() const override
MachineInstr * convertToThreeAddress(MachineFunction::iterator &MBB, MachineBasicBlock::iterator &MI, LiveVariables *LV) const override
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Represents one node in the SelectionDAG.
bool isDS(uint16_t Opcode) const
Definition: SIInstrInfo.h:207
uint64_t DoubleToBits(double Double)
DoubleToBits - This function takes a double and returns the bit equivalent 64-bit integer...
Definition: MathExtras.h:528
const MachineInstrBuilder & addFrameIndex(int Idx) const
bool isVOP2(uint16_t Opcode) const
Definition: SIInstrInfo.h:183
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
Definition: SIInstrInfo.cpp:91
unsigned short Opcode
Definition: MCInstrDesc.h:140
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
Class for arbitrary precision integers.
Definition: APInt.h:73
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool isAmdHsaOS() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:309
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, unsigned Reg, MachineRegisterInfo *MRI) const final
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
KILL - This instruction is a noop that is used only to adjust the liveness of registers.
Definition: TargetOpcodes.h:35
bool isSGPRClassID(unsigned RCID) const
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const
int getIndirectIndexEnd(const MachineFunction &MF) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
Representation of each machine instruction.
Definition: MachineInstr.h:51
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
bool isLiveIn(unsigned Reg) const
isLiveIn - Return true if the specified register is in the live in set.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool isMTBUF(uint16_t Opcode) const
Definition: SIInstrInfo.h:199
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:62
void legalizeOperands(MachineInstr *MI) const
Legalize all operands in this instruction.
use_iterator use_begin(unsigned RegNo) const
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
void setSubReg(unsigned subReg)
int getCommuteRev(uint16_t Opcode)
int getAddr64Inst(uint16_t Opcode)
Operand with register or 32-bit immediate.
Definition: SIDefines.h:48
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it. ...
EVT getValueType() const
Return the ValueType of the referenced return value.
static unsigned getVALUOp(const MachineInstr &MI)
bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const
unsigned getReg() const
getReg - Returns the register number.
MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const override
Build instruction(s) for an indirect register read.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:607
const uint16_t * ImplicitDefs
Definition: MCInstrDesc.h:148
bool isTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA=nullptr) const
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, AliasAnalysis *AA=nullptr) const override
int getIndirectIndexBegin(const MachineFunction &MF) const
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:185
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:149
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
bool isVOPC(uint16_t Opcode) const
Definition: SIInstrInfo.h:191
BasicBlockListType::iterator iterator
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const
MachineInstr * buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const override
Build a MOV instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:56
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
void insertNOPs(MachineBasicBlock::iterator MI, int Count) const
bool isMUBUF(uint16_t Opcode) const
Definition: SIInstrInfo.h:195
bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const override
Definition: SIInstrInfo.cpp:77
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
void * addr
uint64_t getZExtValue() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:340
unsigned scavengeRegister(const TargetRegisterClass *RegClass, MachineBasicBlock::iterator I, int SPAdj)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.